The Effects of Climate Change on the Breeding Success and Timing of African White-backed Vultures

Author

Mieke Deyzel

#remotes::install_github("ropensci/rnoaa")
#remotes::install_github("ropensci/chirps")
#install.packages("GSODR")
#install.packages("AICcmodavg")
#install.packages("tinytex")

Load Libraries

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   4.0.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(janitor)

Attaching package: 'janitor'

The following objects are masked from 'package:stats':

    chisq.test, fisher.test
library(MASS)

Attaching package: 'MASS'

The following object is masked from 'package:dplyr':

    select
library(patchwork)

Attaching package: 'patchwork'

The following object is masked from 'package:MASS':

    area
library(stringr)
library(tibble) 
library(dplyr)
library(knitr)
library(ggplot2)
library(GGally)
library(tidyr)
library(chirps)
library(GSODR)
library(tibble)
library(knitr)
library(AICcmodavg)
library(lme4)
Loading required package: Matrix

Attaching package: 'Matrix'

The following objects are masked from 'package:tidyr':

    expand, pack, unpack


Attaching package: 'lme4'

The following object is masked from 'package:AICcmodavg':

    checkConv
library(tinytex)
conflicted::conflict_prefer("select", "dplyr")
[conflicted] Will prefer dplyr::select over any other package.
conflicted::conflict_prefer("filter", "dplyr")
[conflicted] Will prefer dplyr::filter over any other package.
conflicted::conflict_prefer("lag", "dplyr")
[conflicted] Will prefer dplyr::lag over any other package.

Overdispersion Check

check_overdispersion <- function(model) {
  rp <- residuals(model, type = "pearson")
  disp <- sum(rp^2) / model$df.residual
  disp
} 
vultures    <- read.csv("~/Desktop/Vulture Project/Data/93_24 cleaned.csv")
# WeatherData <- read.csv("~/Desktop/Vulture Project/Data/Weather.csv")

Weather Data

library(rnoaa)
Registered S3 method overwritten by 'hoardr':
  method           from
  print.cache_info httr
The rnoaa package will soon be retired and archived because the underlying APIs have changed dramatically. The package currently works but does not pull the most recent data in all cases. A noaaWeather package is planned as a replacement but the functions will not be interchangeable.
library(tidyverse)
library(lubridate)

 
kim_station <- "684380-99999"

weather_raw <- get_GSOD(
  station = kim_station,
  years   = 1993:2024
)
WeatherData <- weather_raw |>
  transmute(
    YEAR,
    MONTH,
    DAY,
    TEMP,
    MAX,
    MIN,
    PRCP,
    WDSP,
    MXSPD,
    I_HAIL
  ) |>
  arrange(YEAR, MONTH, DAY)

CHIRPS Rainfall for 2004

# coordinates  (24.81 E, 28.62 S)

lonlat <- data.frame(
lon = 24.81,
lat = -28.62
)

kimberley_chirps_2004 <- get_chirps(
object   = lonlat,
dates    = c("2004-01-01", "2004-12-31"),
server   = "ClimateSERV",
as.matrix = FALSE
)

Fetching data from ClimateSERV 

Getting your request...
glimpse(kimberley_chirps_2004)
Rows: 366
Columns: 5
$ id     <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ lon    <dbl> 24.81, 24.81, 24.81, 24.81, 24.81, 24.81, 24.81, 24.81, 24.81, …
$ lat    <dbl> -28.62, -28.62, -28.62, -28.62, -28.62, -28.62, -28.62, -28.62,…
$ date   <date> 2004-01-01, 2004-01-02, 2004-01-03, 2004-01-04, 2004-01-05, 20…
$ chirps <dbl> 0.000000, 0.000000, 0.000000, 0.000000, 12.762877, 21.295383, 1…

CHIRPS Rainfall for 2004 (Dronfield / Kimberley)

daily_chirps_2004 <- tibble(
date    = as.Date(kimberley_chirps_2004$date),
rain_mm = as.numeric(kimberley_chirps_2004$chirps)
)

head(daily_chirps_2004) 
# A tibble: 6 × 2
  date       rain_mm
  <date>       <dbl>
1 2004-01-01     0  
2 2004-01-02     0  
3 2004-01-03     0  
4 2004-01-04     0  
5 2004-01-05    12.8
6 2004-01-06    21.3
summary(daily_chirps_2004$rain_mm)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   0.00    0.00    0.00    1.26    0.00   28.76 
annual_chirps_2004 <- daily_chirps_2004 |>
mutate(YEAR = year(date)) |>
group_by(YEAR) |>
summarise(
total_rain_2004 = sum(rain_mm, na.rm = TRUE),
max_rain_2004   = max(rain_mm, na.rm = TRUE),
rain_days_2004  = sum(rain_mm > 0, na.rm = TRUE),
.groups = "drop"
)

annual_chirps_2004
# A tibble: 1 × 4
   YEAR total_rain_2004 max_rain_2004 rain_days_2004
  <dbl>           <dbl>         <dbl>          <int>
1  2004            461.          28.8             54
total_rain_2004 <- annual_chirps_2004$total_rain_2004
max_rain_2004   <- annual_chirps_2004$max_rain_2004
rain_days_2004  <- annual_chirps_2004$rain_days_2004

total_rain_2004
[1] 461.1258
max_rain_2004
[1] 28.75726
rain_days_2004
[1] 54

Weather Data

WeatherData_clean <- WeatherData |>
  filter(
    YEAR >= 1993,
    YEAR <= 2024
  ) |>
  mutate(
    date = make_date(YEAR, MONTH, DAY)
  ) |>
  # add CHIRPS daily rainfall into 2004 
  left_join(daily_chirps_2004, by = "date") |>
  mutate(
    PRCP = if_else(YEAR == 2004 & !is.na(rain_mm), rain_mm, PRCP)
  ) |>
  dplyr::select(-rain_mm) 
vultures.clean <- vultures |>
  clean_names() |>                               # ringing_date, laying_date
  rename_with(~ gsub("_", ".", .x)) |>           # ringing.date, laying.dater
  mutate(
    ringing.date = ringing.date |> as.character() |> str_trim(),
    ringing.date = if_else(ringing.date %in% c("", "NA", "?"),
                           NA_character_, ringing.date),
    ringing.date = ymd(gsub("/", "-", ringing.date)),
    
    laying.date  = laying.date |> as.character() |> str_trim(),
    laying.date  = if_else(laying.date %in% c("", "NA", "?"),
                           NA_character_, laying.date),
    laying.date  = ymd(gsub("/", "-", laying.date))
  )
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `ringing.date = ymd(gsub("/", "-", ringing.date))`.
Caused by warning:
!  32 failed to parse.
glimpse(vultures.clean)
Rows: 2,657
Columns: 14
$ ringing.date            <date> 1993-09-21, 1993-09-21, 1993-09-19, 1993-09-1…
$ tree.drn                <chr> "1", "2", "3", "4", "5", "6", "8", "12", "13",…
$ southings               <chr> " 28 38 43.8", "28 38 48.2", "28 38 55.6", "28…
$ eastings                <chr> "24 47 14.5", "24 47 05.1", "24 47 15.4", "24 …
$ code                    <int> 1, 1, 1, 1, 1, 1, 1, 15, 1, 1, 1, 1, 3, 1, 5, …
$ ring                    <chr> "G19791", "G19792", "G19781", "G19780", "G1978…
$ colour.rings.r.top.down <chr> "M", "M", "M", "M", "M", "M", "M", "", "M", "M…
$ colour.rings.l.top.down <chr> "B, Y, Y", "B, Y, R", "B, W, G", "B, B, R", "B…
$ mass.g                  <int> 5900, 5000, 3900, 4950, 4950, 3800, 3550, NA, …
$ wing.length.mm          <int> 475, 425, 260, 310, 290, 275, 210, NA, 245, 29…
$ age.at.ringing.days     <int> 89, 80, 57, 64, 61, 59, 49, NA, 55, 61, 5, 92,…
$ value.at.ringing        <int> 34233, 34233, 34231, 34231, 34231, 34231, 3423…
$ hatching.date           <chr> "1993/06/24", "1993/07/03", "1993/07/24", "199…
$ laying.date             <date> 1993-04-29, 1993-05-08, 1993-05-29, 1993-05-2…
summary(vultures.clean)
  ringing.date          tree.drn          southings           eastings        
 Min.   :1993-09-19   Length:2657        Length:2657        Length:2657       
 1st Qu.:2004-06-06   Class :character   Class :character   Class :character  
 Median :2012-10-13   Mode  :character   Mode  :character   Mode  :character  
 Mean   :2011-08-31                                                           
 3rd Qu.:2019-08-21                                                           
 Max.   :2024-10-18                                                           
 NA's   :32                                                                   
      code             ring           colour.rings.r.top.down
 Min.   :  1.000   Length:2657        Length:2657            
 1st Qu.:  1.000   Class :character   Class :character       
 Median :  1.000   Mode  :character   Mode  :character       
 Mean   :  5.001                                             
 3rd Qu.:  6.000                                             
 Max.   :138.000                                             
 NA's   :4                                                   
 colour.rings.l.top.down     mass.g     wing.length.mm  age.at.ringing.days
 Length:2657             Min.   :   2   Min.   : 25.0   Min.   :  5.00     
 Class :character        1st Qu.:4200   1st Qu.:320.0   1st Qu.: 65.00     
 Mode  :character        Median :4800   Median :395.0   Median : 76.00     
                         Mean   :4610   Mean   :375.7   Mean   : 74.18     
                         3rd Qu.:5200   3rd Qu.:460.0   3rd Qu.: 86.00     
                         Max.   :6700   Max.   :570.0   Max.   :300.00     
                         NA's   :1225   NA's   :1192    NA's   :1198       
 value.at.ringing hatching.date       laying.date        
 Min.   :34231    Length:2657        Min.   :1900-01-02  
 1st Qu.:37541    Class :character   1st Qu.:2002-06-02  
 Median :40824    Mode  :character   Median :2011-05-13  
 Mean   :40489                       Mean   :2010-05-21  
 3rd Qu.:43386                       3rd Qu.:2018-05-26  
 Max.   :45583                       Max.   :2024-08-11  
 NA's   :1231                        NA's   :1197        
colSums(is.na(vultures.clean))
           ringing.date                tree.drn               southings 
                     32                       0                       0 
               eastings                    code                    ring 
                      0                       4                       0 
colour.rings.r.top.down colour.rings.l.top.down                  mass.g 
                      0                       0                    1225 
         wing.length.mm     age.at.ringing.days        value.at.ringing 
                   1192                    1198                    1231 
          hatching.date             laying.date 
                      0                    1197 

Active Nests per year

active.nests.raw <- vultures.clean |>
mutate(year = year(ringing.date)) |>
group_by(year) |>
summarise(active_nests = n(), .groups = "drop")

ggplot(active.nests.raw, aes(x = year, y = active_nests)) +
geom_col(fill = "#89c5d6", alpha = 0.85) +
labs(x = "Year", y = "Number of active nests") +
theme_bw() +
theme(panel.grid = element_blank())
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_col()`).

Success proportion

failed.nests.raw <- vultures.clean |>
mutate(year = year(ringing.date)) |>
group_by(year) |>
summarise(
active_nests  = n(),
success_nests = sum(!is.na(laying.date)),
success_prop  = success_nests / active_nests,
.groups       = "drop"
)

ggplot(failed.nests.raw, aes(x = success_prop)) +
geom_histogram(bins = 15, fill = "#89c5d6") +
labs(x = "Breeding success proportion", y = "Count") +
theme_bw() +
theme(panel.grid = element_blank())

Annual Weather Summaries

weather_yearly <- WeatherData_clean |>
group_by(YEAR) |>
summarise(
mean_temp  = mean(TEMP, na.rm = TRUE),
max_temp   = max(MAX, na.rm = TRUE),
min_temp   = min(MIN, na.rm = TRUE),
total_rain = sum(PRCP, na.rm = TRUE),
max_rain   = max(PRCP, na.rm = TRUE),
rain_days  = sum(PRCP > 0, na.rm = TRUE),
mean_wind  = mean(WDSP, na.rm = TRUE),
max_wind   = max(MXSPD, na.rm = TRUE),
hail_days  = sum(I_HAIL, na.rm = TRUE),
.groups    = "drop"
)

# Check the corrected 2004 rainfall

weather_yearly[weather_yearly$YEAR == 2004,
c("YEAR", "total_rain", "max_rain", "rain_days")]
# A tibble: 1 × 4
   YEAR total_rain max_rain rain_days
  <int>      <dbl>    <dbl>     <int>
1  2004       461.     28.8        54

Plot Annual Rainfall

ggplot(weather_yearly, aes(x = YEAR, y = total_rain)) +
geom_line() +
geom_point() +
theme_bw() +
labs(x = "Year", y = "Total annual rainfall (mm)")

Rain over time

library(grid) 

rain_yearly <- weather_yearly

rain_long <- rain_yearly |>
pivot_longer(
cols = c(total_rain, max_rain, rain_days),
names_to = "metric",
values_to = "value"
) |>
mutate(
metric = factor(
metric,
levels = c("total_rain", "max_rain", "rain_days")
),
metric_label = case_when(
metric == "total_rain" ~ "Total annual rainfall (mm)",
metric == "max_rain"   ~ "Max daily rainfall (mm)",
metric == "rain_days"  ~ "Number of rainy days (> 0 mm)"
),
metric_label = factor(
metric_label,
levels = c(
"Total annual rainfall (mm)",
"Max daily rainfall (mm)",
"Number of rainy days (> 0 mm)"
)
)
)

ggplot(rain_long, aes(x = YEAR, y = value)) +
geom_line(linewidth = 0.7) +
geom_point(size = 1.3) +
facet_wrap(~ metric_label, ncol = 1, scales = "free_y") +
theme_classic() +
labs(
x = "Year",
y = NULL
) +
theme(
strip.text    = element_text(size = 9, face = "bold"),
axis.title.x  = element_text(size = 10),
axis.text     = element_text(size = 8),
panel.spacing = unit(0.7, "lines")
)

Check min temp

# Yearly 

ggplot(weather_yearly, aes(x = YEAR, y = min_temp)) +
geom_line() +
geom_point() +
theme_bw() +
labs(x = "Year", y = "Annual minimum temperature (°C)")

# Daily 

weather_daily <- WeatherData_clean |>
group_by(YEAR, MONTH, DAY) |>
summarise(
mean_temp  = mean(TEMP, na.rm = TRUE),
max_temp   = max(MAX,  na.rm = TRUE),
min_temp   = min(MIN,  na.rm = TRUE),
total_rain = sum(PRCP, na.rm = TRUE),
max_rain   = max(PRCP, na.rm = TRUE),
rain_days  = sum(PRCP > 0, na.rm = TRUE),
mean_wind  = mean(WDSP, na.rm = TRUE),
max_wind   = max(MXSPD, na.rm = TRUE),
hail_days  = sum(I_HAIL, na.rm = TRUE),
.groups    = "drop"
)
Warning: There were 291 warnings in `summarise()`.
The first warning was:
ℹ In argument: `max_temp = max(MAX, na.rm = TRUE)`.
ℹ In group 9935: `YEAR = 2020`, `MONTH = 6`, `DAY = 15`.
Caused by warning in `max()`:
! no non-missing arguments to max; returning -Inf
ℹ Run `dplyr::last_dplyr_warnings()` to see the 290 remaining warnings.
temp_daily <- weather_daily |>
mutate(
min_temp = ifelse(is.infinite(min_temp), NA_real_, min_temp),
max_temp = ifelse(is.infinite(max_temp), NA_real_, max_temp)
)

summary(temp_daily$min_temp)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
 -9.900   4.500  10.750   9.818  15.500  30.000       9 
summary(temp_daily$max_temp)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
   5.50   23.70   28.60   28.18   32.90   45.70       1 

Yearly temp summaries

weather_yearly_temp <- weather_daily |>
filter(YEAR >= 1993, YEAR <= 2024) |>
group_by(YEAR) |>
summarise(
mean_temp = mean(mean_temp, na.rm = TRUE),
max_temp  = max(max_temp, na.rm = TRUE),
min_temp  = min(min_temp, na.rm = TRUE),
.groups   = "drop"
)

summary(weather_yearly_temp$min_temp)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 -9.900  -7.425  -6.450  -6.356  -5.450  -3.300 
summary(weather_yearly_temp$max_temp)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  37.20   38.35   39.50   40.01   41.17   45.70 

Temps over time

min_temp <- ggplot(weather_yearly_temp, aes(YEAR, min_temp)) +
geom_line() +
geom_point() +
theme_bw() +
labs(x = "Year", y = "Annual minimum temperature (°C)")

max_temp <- ggplot(weather_yearly_temp, aes(x = YEAR, y = max_temp)) +
geom_line() +
geom_point() +
theme_bw() +
labs(
x = "Year",
y = "Annual maximum temperature (°C)",
title = "Annual Maximum Temperature (1993–2024)"
)

mean_temp <- ggplot(weather_yearly_temp, aes(x = YEAR, y = mean_temp)) +
geom_line() +
geom_point() +
theme_bw() +
labs(
x = "Year",
y = "Annual mean temperature (°C)",
title = "Annual Mean Temperature (1993–2024)"
)
temp_yearly_long <- weather_yearly_temp |>
pivot_longer(
cols = c(max_temp, min_temp, mean_temp),
names_to = "type",
values_to = "value"
) |>
mutate(
type = factor(
type,
levels = c("max_temp", "min_temp", "mean_temp"),
labels = c("Annual Maximum Temperature (°C)",
"Annual Minimum Temperature (°C)",
"Annual Mean Temperature (°C)")
)
)

ggplot(temp_yearly_long, aes(x = YEAR, y = value)) +
geom_line(size = 0.7) +
geom_point(size = 1.5) +
facet_wrap(~ type, scales = "free_y", ncol = 1) +
theme_classic(base_size = 11) +
theme(
strip.text   = element_text(size = 10, face = "bold"),
axis.title   = element_text(size = 9),
axis.text    = element_text(size = 8),
panel.spacing = unit(0.8, "lines")
) +
labs(
x = "Year",
y = "",
title = "Annual Temperature Trends (1993–2024)"
)
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

Lagged Weather summaries

weather_yearly_lag <- weather_yearly |>
arrange(YEAR) |>
mutate(
across(
c(mean_temp, max_temp, min_temp, total_rain, max_rain,
rain_days, mean_wind, max_wind, hail_days),
~ lag(.x, 1), .names = "lag_{.col}"
)
)

Daily weather with lagged variables

weather_lag <- WeatherData_clean |>
mutate(
mean_temp = readr::parse_number(as.character(TEMP)),
max_temp  = readr::parse_number(as.character(MAX)),
min_temp  = readr::parse_number(as.character(MIN)),
mean_wind = readr::parse_number(as.character(WDSP)),
max_wind  = readr::parse_number(as.character(MXSPD)),
rain      = readr::parse_number(as.character(PRCP)),
rain_day  = as.integer(replace_na(rain, 0) > 0),
hail      = I_HAIL
) |>
arrange(date)

Check for weather correlation

weather_yearly |>
  dplyr::select(
    mean_temp, max_temp, min_temp,
    total_rain, max_rain, rain_days,
    mean_wind, max_wind, hail_days
  )
# A tibble: 32 × 9
   mean_temp max_temp min_temp total_rain max_rain rain_days mean_wind max_wind
       <dbl>    <dbl>    <dbl>      <dbl>    <dbl>     <int>     <dbl>    <dbl>
 1      18.4     39.1     -7.8       568.    111          67      4.05     15.4
 2      17.8     39.6     -8         378.     37.3        39      3.75     13.9
 3      18.5     39.1     -7.2       567.     81.3        70      3.79     18  
 4      17.7     37.5     -6.5       516.     39.1        79      3.98     14.9
 5      18.5     40.9     -3.3       381.     33.0        65      3.76     13.9
 6      18.7     37.8     -6         550.     32          84      4.00     20.6
 7      19.4     37.9     -5         354.     42.2        62      3.95     28.3
 8      18.0     37.2     -8.1       578.     78.5        88      3.61     20.6
 9      19.0     39.9     -6.4       548.     27.4       105      3.78     27.8
10      19.4     43.6     -6.6       579.     44.7        83      3.71     24.2
# ℹ 22 more rows
# ℹ 1 more variable: hail_days <dbl>
library(Hmisc)


vars <- weather_yearly |>
  dplyr::select(mean_temp, max_temp, min_temp, total_rain, max_rain,
                rain_days, mean_wind, max_wind, hail_days)

res <- rcorr(as.matrix(vars), type = "pearson")

cor_mat <- res$r
p_mat   <- res$P

cor_df <- as.data.frame(cor_mat) |>
  rownames_to_column("var1") |>
  pivot_longer(-var1, names_to = "var2", values_to = "cor") |>
  mutate(p = as.vector(p_mat))

cor_df <- cor_df |>
  mutate(sig = case_when(
    is.na(p) ~ "",
    p < 0.001 ~ "***",
    p < 0.01  ~ "**",
    p < 0.05  ~ "*",
    TRUE      ~ ""
  ))
ggplot(cor_df, aes(x = var1, y = var2, fill = cor)) +
  geom_tile() +
  geom_text(aes(label = sig), size = 4) +
  scale_fill_gradient2(
    low = "#b2182b", mid = "white", high = "#2166ac",
    midpoint = 0, limits = c(-1, 1)
  ) +
  labs(x = "", y = "", fill = "Correlation") +
  theme_bw() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    panel.grid  = element_blank()
  )

All weather across years

weather_yearly |>
pivot_longer(-YEAR) |>
ggplot(aes(x = YEAR, y = value)) +
geom_line(colour = "#89c5d6") +
facet_wrap(~ name, scales = "free_y") +
labs(x = "Year", y = "Value") +
theme_bw() +
theme(panel.grid = element_blank())

# Effort Data 

active.nests <- vultures.clean |>
mutate(year = year(ringing.date)) |>
group_by(year) |>
summarise(active_nests = n(), .groups = "drop")

effort_weather <- active.nests |>
left_join(weather_yearly, by = c("year" = "YEAR"))

Year only model

m_year <- glm.nb(active_nests ~ year, data = active.nests)
summary(m_year)

Call:
glm.nb(formula = active_nests ~ year, data = active.nests, init.theta = 283.4337529, 
    link = log)

Coefficients:
              Estimate Std. Error z value Pr(>|z|)    
(Intercept) -53.188260   4.927882  -10.79   <2e-16 ***
year          0.028659   0.002451   11.69   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Negative Binomial(283.4338) family taken to be 1)

    Null deviance: 173.368  on 31  degrees of freedom
Residual deviance:  34.003  on 30  degrees of freedom
  (1 observation deleted due to missingness)
AIC: 246.56

Number of Fisher Scoring iterations: 1

              Theta:  283 
          Std. Err.:  329 

 2 x log-likelihood:  -240.558 
check_overdispersion(m_year)
[1] 1.067024

Autocorrelation in residuals

res_effort <- residuals(m_year, type = "pearson")

acf(res_effort,
main = "ACF of residuals: breeding effort (active nests)")

Rainfall

effort_weather <- effort_weather |>
mutate(
z_total_rain = scale(total_rain),
z_max_rain   = scale(max_rain),
z_rain_days  = scale(rain_days)
)

m_rain_only <- glm.nb(
active_nests ~ z_total_rain + z_max_rain + z_rain_days,
data = effort_weather
)
summary(m_rain_only)

Call:
glm.nb(formula = active_nests ~ z_total_rain + z_max_rain + z_rain_days, 
    data = effort_weather, init.theta = 18.0307885, link = log)

Coefficients:
             Estimate Std. Error z value Pr(>|z|)    
(Intercept)   4.39736    0.04606  95.461   <2e-16 ***
z_total_rain -0.18087    0.07301  -2.477   0.0132 *  
z_max_rain   -0.01308    0.05642  -0.232   0.8167    
z_rain_days   0.11469    0.06637   1.728   0.0840 .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Negative Binomial(18.0308) family taken to be 1)

    Null deviance: 41.511  on 31  degrees of freedom
Residual deviance: 32.342  on 28  degrees of freedom
  (1 observation deleted due to missingness)
AIC: 294.97

Number of Fisher Scoring iterations: 1

              Theta:  18.03 
          Std. Err.:  5.51 

 2 x log-likelihood:  -284.973 

Temp

effort_weather <- effort_weather |>
mutate(
z_mean_temp = scale(mean_temp),
z_max_temp  = scale(max_temp),
z_min_temp  = scale(min_temp)
)

max_temp_mod <- glm.nb(
active_nests ~ z_max_temp,
data = effort_weather
)
summary(max_temp_mod)

Call:
glm.nb(formula = active_nests ~ z_max_temp, data = effort_weather, 
    init.theta = 15.77748683, link = log)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)  4.40154    0.04864  90.494   <2e-16 ***
z_max_temp   0.10583    0.04911   2.155   0.0312 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Negative Binomial(15.7775) family taken to be 1)

    Null deviance: 37.216  on 31  degrees of freedom
Residual deviance: 32.494  on 30  degrees of freedom
  (1 observation deleted due to missingness)
AIC: 294.66

Number of Fisher Scoring iterations: 1

              Theta:  15.78 
          Std. Err.:  4.72 

 2 x log-likelihood:  -288.663 
min_temp_mod <- glm.nb(
active_nests ~ z_min_temp,
data = effort_weather
)
summary(min_temp_mod)

Call:
glm.nb(formula = active_nests ~ z_min_temp, data = effort_weather, 
    init.theta = 13.63264473, link = log)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)  4.40648    0.05171  85.218   <2e-16 ***
z_min_temp   0.03703    0.05254   0.705    0.481    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Negative Binomial(13.6326) family taken to be 1)

    Null deviance: 32.929  on 31  degrees of freedom
Residual deviance: 32.466  on 30  degrees of freedom
  (1 observation deleted due to missingness)
AIC: 298.61

Number of Fisher Scoring iterations: 1

              Theta:  13.63 
          Std. Err.:  3.97 

 2 x log-likelihood:  -292.606 
mean_temp_mod <- glm.nb(
active_nests ~ z_mean_temp,
data = effort_weather
)
summary(mean_temp_mod)

Call:
glm.nb(formula = active_nests ~ z_mean_temp, data = effort_weather, 
    init.theta = 13.63790192, link = log)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)  4.40648    0.05170  85.233   <2e-16 ***
z_mean_temp  0.03478    0.05251   0.662    0.508    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Negative Binomial(13.6379) family taken to be 1)

    Null deviance: 32.939  on 31  degrees of freedom
Residual deviance: 32.476  on 30  degrees of freedom
  (1 observation deleted due to missingness)
AIC: 298.61

Number of Fisher Scoring iterations: 1

              Theta:  13.64 
          Std. Err.:  3.97 

 2 x log-likelihood:  -292.605 

Wind

effort_weather <- effort_weather |>
mutate(
z_mean_wind = scale(mean_wind),
z_max_wind  = scale(max_wind)
)

m_wind_only <- glm.nb(
active_nests ~ z_mean_wind + z_max_wind,
data = effort_weather
)
summary(m_wind_only)

Call:
glm.nb(formula = active_nests ~ z_mean_wind + z_max_wind, data = effort_weather, 
    init.theta = 15.33069655, link = log)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)  4.40243    0.04922  89.438   <2e-16 ***
z_mean_wind -0.06706    0.05215  -1.286    0.199    
z_max_wind  -0.05428    0.05231  -1.038    0.299    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Negative Binomial(15.3307) family taken to be 1)

    Null deviance: 36.340  on 31  degrees of freedom
Residual deviance: 32.489  on 29  degrees of freedom
  (1 observation deleted due to missingness)
AIC: 297.43

Number of Fisher Scoring iterations: 1

              Theta:  15.33 
          Std. Err.:  4.56 

 2 x log-likelihood:  -289.431 

Hail

effort_weather <- effort_weather |>
mutate(z_hail_days = scale(hail_days))

m_hail_only <- glm.nb(
active_nests ~ z_hail_days,
data = effort_weather
)
summary(m_hail_only)

Call:
glm.nb(formula = active_nests ~ z_hail_days, data = effort_weather, 
    init.theta = 15.4472834, link = log)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)  4.40226    0.04907  89.721   <2e-16 ***
z_hail_days  0.09695    0.04926   1.968    0.049 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Negative Binomial(15.4473) family taken to be 1)

    Null deviance: 36.569  on 31  degrees of freedom
Residual deviance: 32.511  on 30  degrees of freedom
  (1 observation deleted due to missingness)
AIC: 295.25

Number of Fisher Scoring iterations: 1

              Theta:  15.45 
          Std. Err.:  4.60 

 2 x log-likelihood:  -289.249 

Active nests across years

active.nests <- vultures.clean |>
mutate(year = year(ringing.date)) |>
group_by(year) |>
summarise(active_nests = n(), .groups = "drop")

m_year <- glm.nb(active_nests ~ year, data = active.nests)

active.nests_plot <- active.nests |>
mutate(pred_year = predict(m_year, newdata = active.nests, type = "response"))

ggplot(active.nests_plot, aes(x = year, y = active_nests)) +
geom_col(fill = "#89c5d6", alpha = 0.85) +
geom_line(aes(y = pred_year), linewidth = 1.1, colour = "black") +
labs(
x = "Year",
y = "Number of active nests"
) +
theme_bw() +
theme(panel.grid = element_blank())
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_col()`).
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_line()`).

Effects of max temp on effort

effort_weather <- active.nests |>
left_join(weather_yearly, by = c("year" = "YEAR")) |>
mutate(
z_total_rain = as.numeric(scale(total_rain)),
z_max_rain   = as.numeric(scale(max_rain)),
z_rain_days  = as.numeric(scale(rain_days)),
z_mean_temp  = as.numeric(scale(mean_temp)),
z_max_temp   = as.numeric(scale(max_temp)),
z_min_temp   = as.numeric(scale(min_temp)),
z_mean_wind  = as.numeric(scale(mean_wind)),
z_max_wind   = as.numeric(scale(max_wind)),
z_hail_days  = as.numeric(scale(hail_days))
)

m_eff_maxtemp <- glm.nb(
active_nests ~ z_max_temp,
data = effort_weather
)
summary(m_eff_maxtemp)

Call:
glm.nb(formula = active_nests ~ z_max_temp, data = effort_weather, 
    init.theta = 15.77748683, link = log)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)  4.40154    0.04864  90.494   <2e-16 ***
z_max_temp   0.10583    0.04911   2.155   0.0312 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for Negative Binomial(15.7775) family taken to be 1)

    Null deviance: 37.216  on 31  degrees of freedom
Residual deviance: 32.494  on 30  degrees of freedom
  (1 observation deleted due to missingness)
AIC: 294.66

Number of Fisher Scoring iterations: 1

              Theta:  15.78 
          Std. Err.:  4.72 

 2 x log-likelihood:  -288.663 
new_eff_temp <- tibble(
z_max_temp = seq(
min(effort_weather$z_max_temp, na.rm = TRUE),
max(effort_weather$z_max_temp, na.rm = TRUE),
length.out = 100
)
) |>
mutate(
pred_nests = predict(
m_eff_maxtemp,
newdata = cur_data(),
type = "response"
)
)
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `pred_nests = predict(m_eff_maxtemp, newdata = cur_data(), type
  = "response")`.
Caused by warning:
! `cur_data()` was deprecated in dplyr 1.1.0.
ℹ Please use `pick()` instead.
ggplot(effort_weather, aes(x = z_max_temp, y = active_nests)) +
geom_point(alpha = 0.7) +
geom_line(
data = new_eff_temp,
aes(x = z_max_temp, y = pred_nests),
linewidth = 1.1
) +
labs(
x = "Annual maximum temperature (scaled)",
y = "Number of active nests"
) +
theme_bw() +
theme(panel.grid = element_blank())
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_point()`).

acf(active.nests$active_nests)

Success DF

failed.nests <- vultures.clean |>
mutate(year = year(ringing.date)) |>
filter(!is.na(year)) |>
group_by(year) |>
summarise(
active_nests  = n(),
success_nests = sum(!is.na(laying.date)),
failed_nests  = active_nests - success_nests,
success_prop  = success_nests / active_nests,
.groups       = "drop"
) |>
filter(active_nests > 0)

Year only model

success_year <- glm(
cbind(success_nests, failed_nests) ~ year,
family = binomial,
data   = failed.nests
)
summary(success_year)

Call:
glm(formula = cbind(success_nests, failed_nests) ~ year, family = binomial, 
    data = failed.nests)

Coefficients:
             Estimate Std. Error z value Pr(>|z|)    
(Intercept) 45.434641   8.858715   5.129 2.92e-07 ***
year        -0.022505   0.004405  -5.109 3.24e-07 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 107.689  on 31  degrees of freedom
Residual deviance:  81.283  on 30  degrees of freedom
AIC: 237.58

Number of Fisher Scoring iterations: 3
check_overdispersion(success_year)
[1] 2.670817

Rain

success_df <- failed.nests |>
left_join(weather_yearly, by = c("year" = "YEAR")) |>
mutate(
z_total_rain = as.numeric(scale(total_rain)),
z_max_rain   = as.numeric(scale(max_rain)),
z_rain_days  = as.numeric(scale(rain_days))
)

success_rain <- glm(
cbind(success_nests, failed_nests) ~
z_total_rain + z_max_rain + z_rain_days,
family = binomial,
data   = success_df
)
summary(success_rain)

Call:
glm(formula = cbind(success_nests, failed_nests) ~ z_total_rain + 
    z_max_rain + z_rain_days, family = binomial, data = success_df)

Coefficients:
             Estimate Std. Error z value Pr(>|z|)    
(Intercept)   0.18710    0.03966   4.717 2.39e-06 ***
z_total_rain  0.08304    0.06426   1.292   0.1963    
z_max_rain    0.02901    0.05086   0.570   0.5685    
z_rain_days  -0.09864    0.05967  -1.653   0.0983 .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 107.69  on 31  degrees of freedom
Residual deviance: 102.75  on 28  degrees of freedom
AIC: 263.05

Number of Fisher Scoring iterations: 3
check_overdispersion(success_rain)
[1] 3.581363

Temp

success_df <- success_df |>
mutate(
z_mean_temp = as.numeric(scale(mean_temp)),
z_max_temp  = as.numeric(scale(max_temp)),
z_min_temp  = as.numeric(scale(min_temp))
)

success_minT <- glm(
cbind(success_nests, failed_nests) ~ z_min_temp,
family = binomial,
data   = success_df
)
summary(success_minT)

Call:
glm(formula = cbind(success_nests, failed_nests) ~ z_min_temp, 
    family = binomial, data = success_df)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)  0.17203    0.03931   4.376 1.21e-05 ***
z_min_temp   0.16071    0.04191   3.834 0.000126 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 107.689  on 31  degrees of freedom
Residual deviance:  92.877  on 30  degrees of freedom
AIC: 249.18

Number of Fisher Scoring iterations: 3
check_overdispersion(success_minT)
[1] 3.031976
success_maxT <- glm(
cbind(success_nests, failed_nests) ~ z_max_temp,
family = binomial,
data   = success_df
)
summary(success_maxT)

Call:
glm(formula = cbind(success_nests, failed_nests) ~ z_max_temp, 
    family = binomial, data = success_df)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)  0.18310    0.03944   4.643 3.44e-06 ***
z_max_temp  -0.06103    0.03839  -1.590    0.112    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 107.69  on 31  degrees of freedom
Residual deviance: 105.16  on 30  degrees of freedom
AIC: 261.46

Number of Fisher Scoring iterations: 3
check_overdispersion(success_maxT)
[1] 3.433179
success_meanT <- glm(
cbind(success_nests, failed_nests) ~ z_mean_temp,
family = binomial,
data   = success_df
)
summary(success_meanT)

Call:
glm(formula = cbind(success_nests, failed_nests) ~ z_mean_temp, 
    family = binomial, data = success_df)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)  0.17461    0.03922   4.452 8.51e-06 ***
z_mean_temp  0.05587    0.03856   1.449    0.147    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 107.69  on 31  degrees of freedom
Residual deviance: 105.59  on 30  degrees of freedom
AIC: 261.89

Number of Fisher Scoring iterations: 3
check_overdispersion(success_meanT)
[1] 3.4372

WInd

success_df <- success_df |>
mutate(
z_mean_wind = as.numeric(scale(mean_wind)),
z_max_wind  = as.numeric(scale(max_wind))
)

m_success_wind <- glm(
cbind(success_nests, failed_nests) ~ z_mean_wind + z_max_wind,
family = binomial,
data   = success_df
)
summary(m_success_wind)

Call:
glm(formula = cbind(success_nests, failed_nests) ~ z_mean_wind + 
    z_max_wind, family = binomial, data = success_df)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)  0.17646    0.03937   4.482  7.4e-06 ***
z_mean_wind  0.01436    0.04176   0.344    0.731    
z_max_wind  -0.01688    0.04351  -0.388    0.698    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 107.69  on 31  degrees of freedom
Residual deviance: 107.48  on 29  degrees of freedom
AIC: 265.78

Number of Fisher Scoring iterations: 3
check_overdispersion(m_success_wind)
[1] 3.625179

Hail

success_df <- success_df |>
mutate(
z_hail_days = as.numeric(scale(hail_days))
)

m_success_hail <- glm(
cbind(success_nests, failed_nests) ~ z_hail_days,
family = binomial,
data   = success_df
)
summary(m_success_hail)

Call:
glm(formula = cbind(success_nests, failed_nests) ~ z_hail_days, 
    family = binomial, data = success_df)

Coefficients:
            Estimate Std. Error z value Pr(>|z|)    
(Intercept)  0.18275    0.03940   4.638 3.52e-06 ***
z_hail_days -0.05984    0.03647  -1.641    0.101    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 107.69  on 31  degrees of freedom
Residual deviance: 105.00  on 30  degrees of freedom
AIC: 261.3

Number of Fisher Scoring iterations: 3
check_overdispersion(m_success_hail)
[1] 3.424451

60 and 30 day pre laying period

timing_df <- vultures.clean |>
filter(!is.na(laying.date)) |>
mutate(
year    = year(laying.date),
lay_DOY = yday(laying.date)
)

summarise_60 <- function(lay_date, weather) {
w <- weather |>
filter(date > (lay_date - days(60)), date <= lay_date)

tibble(
mean_temp_60  = mean(w$mean_temp, na.rm = TRUE),
max_temp_60   = mean(w$max_temp, na.rm = TRUE),
min_temp_60   = mean(w$min_temp, na.rm = TRUE),
total_rain_60 = sum(w$rain, na.rm = TRUE),
rain_days_60  = sum(w$rain_day, na.rm = TRUE),
mean_wind_60  = mean(w$mean_wind, na.rm = TRUE),
max_wind_60   = mean(w$max_wind, na.rm = TRUE),
hail_days_60  = sum(w$hail, na.rm = TRUE)
)
}

timing_weather_60 <- timing_df |>
mutate(win60 = purrr::map(laying.date, summarise_60, weather = weather_lag)) |>
unnest(win60)

summarise_30 <- function(lay_date, weather) {
  w <- weather |>
    filter(date > (lay_date - days(30)), date <= lay_date)

  tibble(
    mean_temp_30  = mean(w$mean_temp, na.rm = TRUE),
    max_temp_30   = mean(w$max_temp,  na.rm = TRUE),
    min_temp_30   = mean(w$min_temp,  na.rm = TRUE),
    total_rain_30 = sum(w$rain,       na.rm = TRUE),
    rain_days_30  = sum(w$rain_day,   na.rm = TRUE),
    mean_wind_30  = mean(w$mean_wind, na.rm = TRUE),
    max_wind_30   = mean(w$max_wind,  na.rm = TRUE),
    hail_days_30  = sum(w$hail,       na.rm = TRUE)
  )
}

timing_weather_30 <- timing_df |>
  mutate(win30 = purrr::map(laying.date, summarise_30, weather = weather_lag)) |>
  tidyr::unnest(win30)

Temp

m_timing_temp_60 <- lm(
lay_DOY ~ mean_temp_60 + max_temp_60 + min_temp_60,
data = timing_weather_60
)
summary(m_timing_temp_60)

Call:
lm(formula = lay_DOY ~ mean_temp_60 + max_temp_60 + min_temp_60, 
    data = timing_weather_60)

Residuals:
    Min      1Q  Median      3Q     Max 
-88.511  -5.887  -1.339   4.323 145.827 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  230.2640     4.2240  54.514  < 2e-16 ***
mean_temp_60   2.6343     0.6348   4.150 3.52e-05 ***
max_temp_60   -3.0285     0.3587  -8.443  < 2e-16 ***
min_temp_60   -5.5674     0.3681 -15.123  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 10.13 on 1455 degrees of freedom
  (1 observation deleted due to missingness)
Multiple R-squared:  0.6752,    Adjusted R-squared:  0.6745 
F-statistic:  1008 on 3 and 1455 DF,  p-value: < 2.2e-16

Rain

m_timing_rain_60 <- lm(
lay_DOY ~ total_rain_60 + rain_days_60,
data = timing_weather_60
)
summary(m_timing_rain_60)

Call:
lm(formula = lay_DOY ~ total_rain_60 + rain_days_60, data = timing_weather_60)

Residuals:
     Min       1Q   Median       3Q      Max 
-166.729   -9.039   -2.407    7.930  134.078 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)   168.72873    0.85555 197.217  < 2e-16 ***
total_rain_60  -0.06469    0.01242  -5.207 2.19e-07 ***
rain_days_60   -0.96694    0.09618 -10.054  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 15.69 on 1457 degrees of freedom
Multiple R-squared:  0.2568,    Adjusted R-squared:  0.2557 
F-statistic: 251.7 on 2 and 1457 DF,  p-value: < 2.2e-16

Hail

m_timing_rain_60 <- lm(
lay_DOY ~ total_rain_60 + rain_days_60,
data = timing_weather_60
)
summary(m_timing_rain_60)

Call:
lm(formula = lay_DOY ~ total_rain_60 + rain_days_60, data = timing_weather_60)

Residuals:
     Min       1Q   Median       3Q      Max 
-166.729   -9.039   -2.407    7.930  134.078 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)   168.72873    0.85555 197.217  < 2e-16 ***
total_rain_60  -0.06469    0.01242  -5.207 2.19e-07 ***
rain_days_60   -0.96694    0.09618 -10.054  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 15.69 on 1457 degrees of freedom
Multiple R-squared:  0.2568,    Adjusted R-squared:  0.2557 
F-statistic: 251.7 on 2 and 1457 DF,  p-value: < 2.2e-16

Test

plot(failed.nests$year, failed.nests$success_prop)


Objective 1

objective1_table <- tribble(
  ~`Hypothesis / justification`, ~Model,

  # ---- Temperature ----
  "**Temp**", NA,
  "Warmer years increase energetic cost of adult birds, reducing likelihood to breed.",
  "active_nests ~ mean_temp",

  "Extreme heat events cause thermal stress, reducing breeding effort (active cooling is energetically costly and may reduce adult condition).",
  "active_nests ~ max_temp",

  "Cold extremes increase thermoregulatory demand (often increasing time spent foraging), reducing effort put into breeding",
  "active_nests ~ min_temp",

  # ---- Rain ----
  "**Rain**", NA,
  "High rainfall reduces foraging efficiency and adult condition, reducing breeding effort.",
  "active_nests ~ total_rain",

  "Prolonged rainfall conditions reduce breeding effort by limiting foraging opportunity (fewer or weaker thermals for soaring)",
  "active_nests ~ rain_days",

  "Intense rainfall events decrease foraging efficiency and nest attendance, reducing breeding effort.",
  "active_nests ~ max_rain",

  "Previous year’s rainfall influences food availability and adult condition, affecting breeding effort in the following year.",
  "active_nests ~ lag_total_rain",

  # ---- Wind ----
  "**Wind**", NA,
  "Persistent windy conditions increase the cost of flying, limiting foraging and the energy available for reproduction, reducing breeding effort",
  "active_nests ~ mean_wind",

  "Extreme winds increase nest damage risk, reducing the likelihood that adults will attempt breeding (WBV often reuse nests from previous years) ",
  "active_nests ~ max_wind",

  # ---- Hail ----
  "**Hail**", NA,
  "Severe storm events increase disturbance and nest damage risk, reducing breeding effort",
  "active_nests ~ hail_days",

  # ---- Joint Effects ----
  "**Joint effects**", NA,
  "Cold stress combined with prolonged wet conditions jointly limit breeding effort",
  "active_nests ~ min_temp + total_rain",

  "Cold stress combined with intense rainfall events reduces breeding effort",
  "active_nests ~ min_temp + max_rain",

  "Cold extremes combined with extreme wind increase energetic costs and breeding risk, reducing breeding effort",
  "active_nests ~ min_temp + max_wind",

  "Severe storm exposure combined with extreme wind increases nest disturbance, reducing breeding effort",
  "active_nests ~ hail_days + max_wind",

  "Reduced thermals for soaring combined with high flight costs limit breeding effort",
  "active_nests ~ max_rain + max_wind",
  
  "Heat stress combined with prolonged wet conditions jointly reduce breeding effort by increasing energetic costs while limiting foraging",
  "active_nests ~ max_temp + rain_days",
  
 " High annual rainfall and frequent hail events each impose energetic and disturbance costs that reduce breeding effort.",
"active_nests ~ hail_days + rain_days",

  # ---- Interactive Effects ----
  "**Interactive effects**", NA,
  "The negative effect of cold extremes on breeding effort is increased in persistently wet years.",
  "active_nests ~ min_temp * rain_days",

  "Cold-related energetic stress is amplified in windy years due to increased heat loss and flight costs",
  "active_nests ~ min_temp * mean_wind",

  "Adult condition resulting from the previous year interacts with current-year heat stress to influence breeding effort",
  "active_nests ~ lag_total_rain * max_temp"
)

objective1_table_clean <- objective1_table |>
  dplyr::mutate(Model = ifelse(is.na(Model), "", Model))

kable(
  objective1_table_clean,
  align = c("l", "l"),
  caption = "Objective 1 candidate model set for breeding effort (active nests)"
)
Objective 1 candidate model set for breeding effort (active nests)
Hypothesis / justification Model
Temp
Warmer years increase energetic cost of adult birds, reducing likelihood to breed. active_nests ~ mean_temp
Extreme heat events cause thermal stress, reducing breeding effort (active cooling is energetically costly and may reduce adult condition). active_nests ~ max_temp
Cold extremes increase thermoregulatory demand (often increasing time spent foraging), reducing effort put into breeding active_nests ~ min_temp
Rain
High rainfall reduces foraging efficiency and adult condition, reducing breeding effort. active_nests ~ total_rain
Prolonged rainfall conditions reduce breeding effort by limiting foraging opportunity (fewer or weaker thermals for soaring) active_nests ~ rain_days
Intense rainfall events decrease foraging efficiency and nest attendance, reducing breeding effort. active_nests ~ max_rain
Previous year’s rainfall influences food availability and adult condition, affecting breeding effort in the following year. active_nests ~ lag_total_rain
Wind
Persistent windy conditions increase the cost of flying, limiting foraging and the energy available for reproduction, reducing breeding effort active_nests ~ mean_wind
Extreme winds increase nest damage risk, reducing the likelihood that adults will attempt breeding (WBV often reuse nests from previous years) active_nests ~ max_wind
Hail
Severe storm events increase disturbance and nest damage risk, reducing breeding effort active_nests ~ hail_days
Joint effects
Cold stress combined with prolonged wet conditions jointly limit breeding effort active_nests ~ min_temp + total_rain
Cold stress combined with intense rainfall events reduces breeding effort active_nests ~ min_temp + max_rain
Cold extremes combined with extreme wind increase energetic costs and breeding risk, reducing breeding effort active_nests ~ min_temp + max_wind
Severe storm exposure combined with extreme wind increases nest disturbance, reducing breeding effort active_nests ~ hail_days + max_wind
Reduced thermals for soaring combined with high flight costs limit breeding effort active_nests ~ max_rain + max_wind
Heat stress combined with prolonged wet conditions jointly reduce breeding effort by increasing energetic costs while limiting foraging active_nests ~ max_temp + rain_days
High annual rainfall and frequent hail events each impose energetic and disturbance costs that reduce breeding effort. active_nests ~ hail_days + rain_days
Interactive effects
The negative effect of cold extremes on breeding effort is increased in persistently wet years. active_nests ~ min_temp * rain_days
Cold-related energetic stress is amplified in windy years due to increased heat loss and flight costs active_nests ~ min_temp * mean_wind
Adult condition resulting from the previous year interacts with current-year heat stress to influence breeding effort active_nests ~ lag_total_rain * max_temp
# Annual effort: number of active nests per year
active.nests <- vultures.clean |>
  mutate(year = year(ringing.date)) |>
  group_by(year) |>
  summarise(active_nests = n(), .groups = "drop")


# Join to weather, create log effort response
effort_weather <- active.nests |>
  left_join(weather_yearly_lag, by = c("year" = "YEAR")) |>
  filter(!is.na(lag_total_rain)) |>
  mutate(
    log_active_nests = log(active_nests)
  )
# checking residuals
par(mfrow = c(1, 2))

Fitingt then no-year models

# Baseline 
m_null <- lm(log_active_nests ~ 1, data = effort_weather)

# Single predictor models 
m_mean_temp      <- lm(log_active_nests ~ mean_temp,      data = effort_weather)
m_max_temp       <- lm(log_active_nests ~ max_temp,       data = effort_weather)
m_min_temp       <- lm(log_active_nests ~ min_temp,       data = effort_weather)

m_total_rain     <- lm(log_active_nests ~ total_rain,     data = effort_weather)
m_rain_days      <- lm(log_active_nests ~ rain_days,      data = effort_weather)
m_max_rain       <- lm(log_active_nests ~ max_rain,       data = effort_weather)
m_lag_total_rain <- lm(log_active_nests ~ lag_total_rain, data = effort_weather)

m_mean_wind      <- lm(log_active_nests ~ mean_wind,      data = effort_weather)
m_max_wind       <- lm(log_active_nests ~ max_wind,       data = effort_weather)
m_hail_days      <- lm(log_active_nests ~ hail_days,      data = effort_weather)

# Joint effects 
m_minT_totalR    <- lm(log_active_nests ~ min_temp + total_rain,  data = effort_weather)
m_minT_maxR      <- lm(log_active_nests ~ min_temp + max_rain,    data = effort_weather)
m_minT_maxW      <- lm(log_active_nests ~ min_temp + max_wind,    data = effort_weather)
m_hail_maxW      <- lm(log_active_nests ~ hail_days + max_wind,   data = effort_weather)
m_maxR_maxW      <- lm(log_active_nests ~ max_rain + max_wind,    data = effort_weather)
m_maxT_rainDays  <- lm(log_active_nests ~ max_temp + rain_days,   data = effort_weather)
m_hail_rainDays  <- lm(log_active_nests ~ hail_days + rain_days,  data = effort_weather)

# Interactions 
m_minT_x_rainDays <- lm(log_active_nests ~ min_temp * rain_days,      data = effort_weather)
m_minT_x_meanWind <- lm(log_active_nests ~ min_temp * mean_wind,      data = effort_weather)
m_lagR_x_maxT     <- lm(log_active_nests ~ lag_total_rain * max_temp, data = effort_weather)

Fit with year models

# Baseline 
m_year <- lm(log_active_nests ~ year, data = effort_weather)

# Single predictor models 
m_mean_temp_year      <- lm(log_active_nests ~ year + mean_temp,      data = effort_weather)
m_max_temp_year       <- lm(log_active_nests ~ year + max_temp,       data = effort_weather)
m_min_temp_year       <- lm(log_active_nests ~ year + min_temp,       data = effort_weather)

m_total_rain_year     <- lm(log_active_nests ~ year + total_rain,     data = effort_weather)
m_rain_days_year      <- lm(log_active_nests ~ year + rain_days,      data = effort_weather)
m_max_rain_year       <- lm(log_active_nests ~ year + max_rain,       data = effort_weather)
m_lag_total_rain_year <- lm(log_active_nests ~ year + lag_total_rain, data = effort_weather)

m_mean_wind_year      <- lm(log_active_nests ~ year + mean_wind,      data = effort_weather)
m_max_wind_year       <- lm(log_active_nests ~ year + max_wind,       data = effort_weather)
m_hail_days_year      <- lm(log_active_nests ~ year + hail_days,      data = effort_weather)

# Joint effects 
m_minT_totalR_year    <- lm(log_active_nests ~ year + min_temp + total_rain, data = effort_weather)
m_minT_maxR_year      <- lm(log_active_nests ~ year + min_temp + max_rain,   data = effort_weather)
m_minT_maxW_year      <- lm(log_active_nests ~ year + min_temp + max_wind,   data = effort_weather)
m_hail_maxW_year      <- lm(log_active_nests ~ year + hail_days + max_wind,  data = effort_weather)
m_maxR_maxW_year      <- lm(log_active_nests ~ year + max_rain + max_wind,   data = effort_weather)
m_maxT_rainDays_year  <- lm(log_active_nests ~ year + max_temp + rain_days,  data = effort_weather)
m_hail_rainDays_year  <- lm(log_active_nests ~ year + hail_days + rain_days, data = effort_weather)

# Interactions 
m_minT_x_rainDays_year <- lm(log_active_nests ~ year + min_temp * rain_days,      data = effort_weather)
m_minT_x_meanWind_year <- lm(log_active_nests ~ year + min_temp * mean_wind,      data = effort_weather)
m_lagR_x_maxT_year     <- lm(log_active_nests ~ year + lag_total_rain * max_temp, data = effort_weather)

Two separate AICc model selection runs

# AIC 1: without year
cand_effort_no_year <- list(
  null            = m_null,

  mean_temp       = m_mean_temp,
  max_temp        = m_max_temp,
  min_temp        = m_min_temp,
  total_rain      = m_total_rain,
  rain_days       = m_rain_days,
  max_rain        = m_max_rain,
  lag_total_rain  = m_lag_total_rain,
  mean_wind       = m_mean_wind,
  max_wind        = m_max_wind,
  hail_days       = m_hail_days,

  minT_totalR     = m_minT_totalR,
  minT_maxR       = m_minT_maxR,
  minT_maxW       = m_minT_maxW,
  hail_maxW       = m_hail_maxW,
  maxR_maxW       = m_maxR_maxW,
  maxT_rainDays   = m_maxT_rainDays,
  hail_rainDays   = m_hail_rainDays,

  minT_x_rainDays = m_minT_x_rainDays,
  minT_x_meanWind = m_minT_x_meanWind,
  lagR_x_maxT     = m_lagR_x_maxT
)

aic_effort_no_year <- aictab(cand.set = cand_effort_no_year, modnames = names(cand_effort_no_year))
aic_effort_no_year

Model selection based on AICc:

                K  AICc Delta_AICc AICcWt Cum.Wt    LL
hail_days       3 12.04       0.00   0.17   0.17 -2.57
max_temp        3 12.77       0.74   0.12   0.29 -2.94
total_rain      3 12.83       0.79   0.12   0.41 -2.97
hail_maxW       4 13.16       1.12   0.10   0.51 -1.81
null            2 14.26       2.22   0.06   0.57 -4.92
maxT_rainDays   4 14.37       2.33   0.05   0.62 -2.41
max_wind        3 14.39       2.35   0.05   0.68 -3.75
hail_rainDays   4 14.44       2.40   0.05   0.73 -2.45
mean_wind       3 14.84       2.81   0.04   0.77 -3.98
lag_total_rain  3 15.07       3.03   0.04   0.81 -4.09
minT_totalR     4 15.45       3.41   0.03   0.84 -2.96
lagR_x_maxT     5 15.71       3.67   0.03   0.87 -1.65
max_rain        3 15.74       3.70   0.03   0.89 -4.43
maxR_maxW       4 15.84       3.81   0.03   0.92 -3.15
mean_temp       3 16.46       4.42   0.02   0.94 -4.79
min_temp        3 16.61       4.58   0.02   0.96 -4.86
rain_days       3 16.72       4.68   0.02   0.97 -4.92
minT_maxW       4 17.03       5.00   0.01   0.99 -3.75
minT_maxR       4 18.24       6.20   0.01   1.00 -4.35
minT_x_meanWind 5 20.27       8.23   0.00   1.00 -3.93
minT_x_rainDays 5 21.95       9.92   0.00   1.00 -4.78
# AIC 2: with year
cand_effort_with_year <- list(
  year               = m_year,

  mean_temp          = m_mean_temp_year,
  max_temp           = m_max_temp_year,
  min_temp           = m_min_temp_year,
  total_rain         = m_total_rain_year,
  rain_days          = m_rain_days_year,
  max_rain           = m_max_rain_year,
  lag_total_rain     = m_lag_total_rain_year,
  mean_wind          = m_mean_wind_year,
  max_wind           = m_max_wind_year,
  hail_days          = m_hail_days_year,

  minT_totalR        = m_minT_totalR_year,
  minT_maxR          = m_minT_maxR_year,
  minT_maxW          = m_minT_maxW_year,
  hail_maxW          = m_hail_maxW_year,
  maxR_maxW          = m_maxR_maxW_year,
  maxT_rainDays      = m_maxT_rainDays_year,
  hail_rainDays      = m_hail_rainDays_year,

  minT_x_rainDays    = m_minT_x_rainDays_year,
  minT_x_meanWind    = m_minT_x_meanWind_year,
  lagR_x_maxT        = m_lagR_x_maxT_year
)

aic_effort_with_year <- aictab(cand.set = cand_effort_with_year, modnames = names(cand_effort_with_year))
aic_effort_with_year

Model selection based on AICc:

                K   AICc Delta_AICc AICcWt Cum.Wt    LL
max_rain        4 -29.07       0.00   0.19   0.19 19.31
year            3 -28.43       0.64   0.14   0.33 17.66
total_rain      4 -27.49       1.58   0.09   0.42 18.51
minT_maxR       5 -27.44       1.63   0.09   0.51 19.92
min_temp        4 -26.68       2.39   0.06   0.56 18.11
mean_temp       4 -26.59       2.48   0.06   0.62 18.06
max_temp        4 -26.42       2.65   0.05   0.67 17.98
maxR_maxW       5 -26.41       2.66   0.05   0.72 19.40
max_wind        4 -25.90       3.18   0.04   0.76 17.72
hail_days       4 -25.88       3.19   0.04   0.80 17.71
mean_wind       4 -25.87       3.21   0.04   0.84 17.70
lag_total_rain  4 -25.81       3.27   0.04   0.88 17.67
rain_days       4 -25.78       3.29   0.04   0.92 17.66
minT_totalR     5 -25.04       4.03   0.03   0.94 18.72
minT_maxW       5 -23.83       5.24   0.01   0.96 18.11
maxT_rainDays   5 -23.78       5.29   0.01   0.97 18.09
hail_maxW       5 -23.12       5.95   0.01   0.98 17.76
hail_rainDays   5 -23.03       6.04   0.01   0.99 17.72
lagR_x_maxT     6 -21.36       7.71   0.00   0.99 18.43
minT_x_meanWind 6 -21.30       7.77   0.00   1.00 18.40
minT_x_rainDays 6 -21.04       8.03   0.00   1.00 18.27

Check residuals

# identify top-ranked model (no year)
best_no_year_name <- aic_effort_no_year$Modnames[1]
best_no_year_E <- cand_effort_no_year[[best_no_year_name]]

par(mfrow = c(2,2))
plot(best_no_year_E)

# identify top-ranked model (with year)
best_with_year_name <- aic_effort_with_year$Modnames[1]
best_with_year_E <- cand_effort_with_year[[best_with_year_name]]

par(mfrow = c(2,2))
plot(best_with_year_E)

# year-only baseline and best with-year model
summary(m_year)$adj.r.squared
[1] 0.758928
summary(best_with_year_E)$adj.r.squared
[1] 0.7754688
# best no-year model
summary(best_no_year_E)$adj.r.squared
[1] 0.1105514

Figures

ggplot(effort_weather, aes(x = year, y = active_nests)) +
  geom_point(
    size = 2.4,
    colour = "#B1BD8C",
    alpha = 0.9
  ) +
  geom_smooth(
    method = "lm",
    se = TRUE,
    colour = "#D5D6D2",
    fill   = "#D5D6D2",
    alpha  = 0.25,
    linewidth = 1.8
  ) +
  scale_x_continuous(
    breaks = seq(
      min(effort_weather$year),
      max(effort_weather$year),
      by = 5
    )
  ) +
  labs(
    x = "Year",
    y = "Number of active nests"
  ) +
  theme_bw(base_family = "Times New Roman") +
  theme(
    panel.grid = element_blank(),
    axis.title = element_text(face = "bold", size = 12),
    axis.text  = element_text(face = "bold", size = 10)
  )
`geom_smooth()` using formula = 'y ~ x'

prep_aic_plot <- function(aic_obj, set_label){
  df <- as.data.frame(aic_obj)

  # model names
  if ("Modnames" %in% names(df)) df$model <- df$Modnames
  if (!"model" %in% names(df)) df$model <- rownames(df)

  df |>
    dplyr::mutate(set = set_label) |>
    dplyr::rename(delta = Delta_AICc, weight = AICcWt) |>
    dplyr::select(set, model, delta, weight)
}

aic_plot_df <- dplyr::bind_rows(
  prep_aic_plot(aic_effort_with_year, "With year"),
  prep_aic_plot(aic_effort_no_year, "Without year")
) |>
  dplyr::filter(delta <= 3) |>
  dplyr::group_by(set) |>
  dplyr::mutate(model = forcats::fct_reorder(model, -delta)) |>
  dplyr::ungroup()

ggplot(aic_plot_df, aes(x = delta, y = model)) +
  geom_vline(xintercept = 2, linetype = 2) +
  geom_point(aes(size = weight), colour = "grey20") +
  facet_wrap(~set, scales = "free_y") +
  scale_size_continuous(range = c(2, 8)) +
  labs(
    x = expression(Delta*"AICc (lower is better)"),
    y = NULL,
    title = "Support for candidate models with and without year",
    subtitle = "Only models with ΔAICc ≤ 3 are shown; point size indicates AICc weight"
  ) +
  theme_bw() +
  theme(panel.grid = element_blank())

library(dplyr)
library(ggplot2)
library(forcats)

plot_aic_facets_from_cands <- function(cand_with, cand_without,
                                       label_with = "With year",
                                       label_without = "Without year",
                                       daic_max = 3){

  build_df <- function(cand_list, panel_label){
    tibble(
      model_id = names(cand_list),
      AICc = sapply(cand_list, AICcmodavg::AICc)
    ) |>
      arrange(AICc) |>
      mutate(
        dAICc = AICc - min(AICc),
        weight = exp(-0.5 * dAICc) / sum(exp(-0.5 * dAICc)),
        panel = panel_label
      )
  }

  dat <- bind_rows(
    build_df(cand_with,    label_with),
    build_df(cand_without, label_without)
  ) |>
    filter(is.finite(dAICc), dAICc <= daic_max) |>
    group_by(panel) |>
    arrange(dAICc, .by_group = TRUE) |>
    mutate(model_id = fct_reorder(model_id, dAICc, .desc = TRUE)) |>
    ungroup()

  ggplot(dat, aes(x = dAICc, y = model_id, size = weight)) +
    geom_vline(xintercept = 2, linetype = "dashed") +
    geom_point(alpha = 0.9) +
    facet_wrap(~panel, scales = "free_y") +
    labs(
      x = expression(Delta*AIC[c]*" (lower is better)"),
      y = NULL,
      size = "AICc weight",
      title = "Support for candidate models with and without year",
      subtitle = paste0("Only models with ΔAICc ≤ ", daic_max,
                        " are shown; dashed line = ΔAICc = 2")
    ) +
    theme_bw() +
    theme(
      panel.grid = element_blank(),
      legend.position = "right"
    )
}
# Table 1 (weather-only)

library(dplyr)
library(tibble)

aic_df <- as.data.frame(aic_effort_no_year)

# model ids
if ("Modnames" %in% names(aic_df)) {
  aic_df <- aic_df |> dplyr::mutate(model_id = Modnames)
} else {
  aic_df <- tibble::rownames_to_column(aic_df, "model_id")
}

table1 <- aic_df |>
  dplyr::rename(
    `ΔAICc` = Delta_AICc,
    w       = AICcWt
  ) |>
  dplyr::select(model_id, K, AICc, `ΔAICc`, w) |>
  dplyr::arrange(`ΔAICc`)

get_stats <- function(mod){
  adjr2 <- summary(mod)$adj.r.squared
  if (length(coef(mod)) == 2) {
    cf <- summary(mod)$coefficients
    slope_se <- paste0(round(cf[2,1], 3), " (", round(cf[2,2], 3), ")")
  } else {
    slope_se <- ""
  }
  tibble::tibble(`Adj. R²` = adjr2, `slope (SE)` = slope_se)
}

stats_df <- lapply(table1$model_id, function(mn) get_stats(cand_effort_no_year[[mn]])) |>
  dplyr::bind_rows() |>
  dplyr::mutate(model_id = table1$model_id)

table1 <- table1 |>
  dplyr::left_join(stats_df, by = "model_id") |>
  dplyr::mutate(
    AICc = round(AICc, 2),
    `ΔAICc` = round(`ΔAICc`, 2),
    w = round(w, 3),
    `Adj. R²` = round(`Adj. R²`, 3)
  )

# grouping and neat names
group_order <- c("Baseline","Rain","Temperature","Wind","Hail","Joint effects","Interactions")

model_group <- function(m){
  dplyr::case_when(
    m == "null" ~ "Baseline",
    m %in% c("total_rain","rain_days","max_rain","lag_total_rain") ~ "Rain",
    m %in% c("mean_temp","max_temp","min_temp") ~ "Temperature",
    m %in% c("mean_wind","max_wind") ~ "Wind",
    m %in% c("hail_days") ~ "Hail",
    m %in% c("minT_totalR","minT_maxR","minT_maxW","hail_maxW","maxR_maxW","maxT_rainDays","hail_rainDays") ~ "Joint effects",
    m %in% c("minT_x_rainDays","minT_x_meanWind","lagR_x_maxT") ~ "Interactions",
    TRUE ~ "Joint effects"
  )
}

pretty_model <- function(x){
  dplyr::recode(
    x,
    "null"            = "Intercept only",
    "total_rain"      = "Total rainfall",
    "rain_days"       = "Rainy days",
    "max_rain"        = "Maximum daily rainfall",
    "lag_total_rain"  = "Previous year's total rainfall",
    "mean_temp"       = "Mean temperature",
    "max_temp"        = "Maximum temperature",
    "min_temp"        = "Minimum temperature",
    "mean_wind"       = "Mean wind speed",
    "max_wind"        = "Maximum wind speed",
    "hail_days"       = "Hail days",
    "minT_totalR"     = "Min temperature + total rainfall",
    "minT_maxR"       = "Min temperature + max daily rainfall",
    "minT_maxW"       = "Min temperature + max wind speed",
    "hail_maxW"       = "Hail days + max wind speed",
    "maxR_maxW"       = "Max daily rainfall + max wind speed",
    "maxT_rainDays"   = "Max temperature + rainy days",
    "hail_rainDays"   = "Hail days + rainy days",
    "minT_x_rainDays" = "Min temperature × rainy days",
    "minT_x_meanWind" = "Min temperature × mean wind speed",
    "lagR_x_maxT"     = "Prev. rainfall × max temperature",
    .default = x
  )
}

table1_clean <- table1 |>
  dplyr::mutate(
    Group = model_group(model_id),
    Model = pretty_model(model_id),
    Group = factor(Group, levels = group_order)
  ) |>
  dplyr::arrange(Group, `ΔAICc`) |>
  dplyr::select(Group, Model, K, AICc, `ΔAICc`, w, `Adj. R²`, `slope (SE)`)

# print (kable + pack_rows)
group_sizes <- table1_clean |>
  dplyr::count(Group, name = "n") |>
  dplyr::mutate(
    start = cumsum(dplyr::lag(n, default = 0)) + 1,
    end   = cumsum(n)
  )

tbl <- knitr::kable(
  table1_clean |> dplyr::select(-Group),
  caption = "Table 1. Weather-only candidate models explaining annual breeding effort (log number of active nests) at Dronfield. K = number of parameters; AICc = small-sample AIC; ΔAICc = difference from the best model; w = Akaike weight. Adjusted R² is shown for all models; slope (SE) is reported only for single-predictor models.",
  align = "l"
) |>
  kableExtra::kable_styling(full_width = FALSE)

for (i in seq_len(nrow(group_sizes))) {
  tbl <- tbl |>
    kableExtra::pack_rows(
      as.character(group_sizes$Group[i]),
      group_sizes$start[i],
      group_sizes$end[i]
    )
}

tbl
Table 1. Weather-only candidate models explaining annual breeding effort (log number of active nests) at Dronfield. K = number of parameters; AICc = small-sample AIC; ΔAICc = difference from the best model; w = Akaike weight. Adjusted R² is shown for all models; slope (SE) is reported only for single-predictor models.
Model K AICc ΔAICc w Adj. R² slope (SE)
Baseline
Intercept only 2 14.26 2.22 0.057 0.000
Rain
Total rainfall 3 12.83 0.79 0.117 0.088 -0.001 (0)
Previous year's total rainfall 3 15.07 3.03 0.038 0.019 0 (0)
Maximum daily rainfall 3 15.74 3.70 0.027 -0.002 -0.003 (0.003)
Rainy days 3 16.72 4.68 0.017 -0.034 0 (0.003)
Temperature
Maximum temperature 3 12.77 0.74 0.120 0.089 0.046 (0.023)
Mean temperature 3 16.46 4.42 0.019 -0.026 0.031 (0.062)
Minimum temperature 3 16.61 4.58 0.018 -0.031 0.011 (0.036)
Wind
Maximum wind speed 3 14.39 2.35 0.054 0.041 -0.016 (0.011)
Mean wind speed 3 14.84 2.81 0.043 0.026 -0.18 (0.134)
Hail
Hail days 3 12.04 0.00 0.174 0.111 0.076 (0.035)
Joint effects
Hail days + max wind speed 4 13.16 1.12 0.099 0.123
Max temperature + rainy days 4 14.37 2.33 0.054 0.088
Hail days + rainy days 4 14.44 2.40 0.052 0.086
Min temperature + total rainfall 4 15.45 3.41 0.032 0.056
Max daily rainfall + max wind speed 4 15.84 3.81 0.026 0.044
Min temperature + max wind speed 4 17.03 5.00 0.014 0.006
Min temperature + max daily rainfall 4 18.24 6.20 0.008 -0.033
Interactions
Prev. rainfall × max temperature 5 15.71 3.67 0.028 0.100
Min temperature × mean wind speed 5 20.27 8.23 0.003 -0.043
Min temperature × rainy days 5 21.95 9.92 0.001 -0.101
library(dplyr)
library(ggplot2)
library(tidytext)

aic_no_year_plot <- table1_clean |>
  filter(Group != "Baseline" | Model == "Intercept only") |>
  mutate(
    Model_f = reorder_within(Model, `ΔAICc`, Group),
    is_null = Model == "Intercept only"
  )

ggplot(aic_no_year_plot, aes(x = `ΔAICc`, y = Model_f)) +
  geom_vline(xintercept = 2, linetype = "dashed", colour = "grey40") +
  geom_point(aes(size = w, shape = is_null), alpha = 0.85) +
  scale_shape_manual(values = c(`FALSE` = 16, `TRUE` = 17), guide = "none") +
  scale_y_reordered() +
  scale_size_continuous(range = c(2.5, 7)) +
  labs(
    x = expression(Delta*AIC[c]),
    y = NULL,
    size = "Akaike weight",
    title = "Weather-only model support (year excluded)"
  ) +
  theme_bw() +
  theme(
    panel.grid = element_blank(),
    axis.text.y = element_text(size = 10)
  )

library(dplyr)
library(ggplot2)
library(tidytext)


aic_with_year_plot <- as.data.frame(aic_effort_with_year) |>
  tibble::rownames_to_column("model_id") |>
  dplyr::rename(
    `ΔAICc` = Delta_AICc,
    w = AICcWt
  ) |>
  dplyr::mutate(
    Model = pretty_model(model_id),
    Model_f = reorder(Model, `ΔAICc`),
    is_null = model_id == "null"
  )

ggplot(aic_with_year_plot, aes(x = `ΔAICc`, y = Model_f)) +
  geom_vline(xintercept = 2, linetype = "dashed", colour = "grey40") +
  geom_point(aes(size = w, shape = is_null), alpha = 0.85) +
  scale_shape_manual(values = c(`FALSE` = 16, `TRUE` = 17), guide = "none") +
  scale_size_continuous(range = c(2.5, 7)) +
  labs(
    x = expression(Delta*AIC[c]),
    y = NULL,
    size = "Akaike weight",
    title = "Weather + year model support"
  ) +
  theme_bw() +
  theme(
    panel.grid = element_blank(),
    axis.text.y = element_text(size = 10)
  )

library(dplyr)

model_group_with_year <- function(m){

  dplyr::case_when(
    m %in% c("year") ~ "Baseline",

    m %in% c("total_rain","rain_days","max_rain","lag_total_rain") ~ "Rain",

    m %in% c("mean_temp","max_temp","min_temp") ~ "Temperature",

    m %in% c("mean_wind","max_wind") ~ "Wind",

    m %in% c("hail_days") ~ "Hail",

    m %in% c(
      "minT_totalR","minT_maxR","minT_maxW","hail_maxW","maxR_maxW",
      "maxT_rainDays","hail_rainDays","maxT_maxR","minT_rainDays"
    ) ~ "Joint effects",

    m %in% c(
      "minT_totalR_x","minT_maxR_x","minT_maxW_x","hail_maxW_x","maxR_maxW_x",
      "maxT_rainDays_x","hail_rainDays_x","maxT_maxR_x","minT_rainDays_x"
    ) ~ "Interactions",

    TRUE ~ "Other"
  )
}
library(dplyr)
library(tibble)
library(officer)
library(flextable)



aic_df <- as.data.frame(aic_effort_no_year)

# model ids
if ("Modnames" %in% names(aic_df)) {
  aic_df <- aic_df |> dplyr::mutate(model_id = Modnames)
} else {
  aic_df <- tibble::rownames_to_column(aic_df, "model_id")
}

table1 <- aic_df |>
  dplyr::rename(
    `ΔAICc` = Delta_AICc,
    w = AICcWt
  ) |>
  dplyr::select(model_id, K, AICc, `ΔAICc`, w) |>
  dplyr::arrange(`ΔAICc`)


get_stats <- function(mod){
  adjr2 <- summary(mod)$adj.r.squared
  if (length(coef(mod)) == 2) {
    cf <- summary(mod)$coefficients
    slope_se <- paste0(round(cf[2,1], 3), " (", round(cf[2,2], 3), ")")
  } else {
    slope_se <- ""
  }
  tibble::tibble(`Adj. R²` = adjr2, `slope (SE)` = slope_se)
}

stats_df <- lapply(table1$model_id, function(mn) get_stats(cand_effort_no_year[[mn]])) |>
  dplyr::bind_rows() |>
  dplyr::mutate(model_id = table1$model_id)

table1 <- table1 |>
  dplyr::left_join(stats_df, by = "model_id") |>
  dplyr::mutate(
    AICc = round(AICc, 2),
    `ΔAICc` = round(`ΔAICc`, 2),
    w = round(w, 3),
    `Adj. R²` = round(`Adj. R²`, 3)
  )


group_order <- c("Baseline","Rain","Temperature","Wind","Hail","Joint effects","Interactions")

model_group <- function(m){
  dplyr::case_when(
    m == "null" ~ "Baseline",
    m %in% c("total_rain","rain_days","max_rain","lag_total_rain") ~ "Rain",
    m %in% c("mean_temp","max_temp","min_temp") ~ "Temperature",
    m %in% c("mean_wind","max_wind") ~ "Wind",
    m %in% c("hail_days") ~ "Hail",
    m %in% c("minT_totalR","minT_maxR","minT_maxW","hail_maxW","maxR_maxW","maxT_rainDays","hail_rainDays") ~ "Joint effects",
    m %in% c("minT_x_rainDays","minT_x_meanWind","lagR_x_maxT") ~ "Interactions",
    TRUE ~ "Joint effects"
  )
}

pretty_model <- function(x){
  dplyr::recode(
    x,
    "null"            = "Intercept only",
    "total_rain"      = "Total rainfall",
    "rain_days"       = "Rainy days",
    "max_rain"        = "Maximum daily rainfall",
    "lag_total_rain"  = "Previous year's total rainfall",
    "mean_temp"       = "Mean temperature",
    "max_temp"        = "Maximum temperature",
    "min_temp"        = "Minimum temperature",
    "mean_wind"       = "Mean wind speed",
    "max_wind"        = "Maximum wind speed",
    "hail_days"       = "Hail days",
    "minT_totalR"     = "Min temperature + total rainfall",
    "minT_maxR"       = "Min temperature + max daily rainfall",
    "minT_maxW"       = "Min temperature + max wind speed",
    "hail_maxW"       = "Hail days + max wind speed",
    "maxR_maxW"       = "Max daily rainfall + max wind speed",
    "maxT_rainDays"   = "Max temperature + rainy days",
    "hail_rainDays"   = "Hail days + rainy days",
    "minT_x_rainDays" = "Min temperature × rainy days",
    "minT_x_meanWind" = "Min temperature × mean wind speed",
    "lagR_x_maxT"     = "Prev. rainfall × max temperature",
    .default = x
  )
}

table1_clean <- table1 |>
  dplyr::mutate(
    Group = model_group(model_id),
    Model = pretty_model(model_id),
    Group = factor(Group, levels = group_order)
  ) |>
  dplyr::arrange(Group, `ΔAICc`) |>
  dplyr::select(Group, Model, K, AICc, `ΔAICc`, w, `Adj. R²`, `slope (SE)`)


table2_raw <- table1_clean
names(table2_raw) <- trimws(names(table2_raw))


u_groups <- levels(droplevels(table2_raw$Group))

out <- bind_rows(lapply(u_groups, function(g){

  header_row <- table2_raw %>%
    filter(Group == g) %>%
    select(-Group) %>%
    slice(1) %>%
    mutate(across(everything(), ~ NA)) %>%
    mutate(Model = as.character(g))

  data_rows <- table2_raw %>%
    filter(Group == g) %>%
    select(-Group)

  header_row$is_group <- TRUE
  data_rows$is_group  <- FALSE

  bind_rows(header_row, data_rows)
}))

group_idx <- which(out$is_group)
out_ft <- out %>% select(-is_group)

ft <- flextable(out_ft) %>%
  font(fontname = "Times New Roman", part = "all") %>%
  fontsize(size = 12, part = "all") %>%
  bold(part = "header") %>%
  bg(part = "header", bg = "#f2f2f2") %>%
  bold(i = group_idx, part = "body") %>%
  bg(i = group_idx, bg = "#e6e6e6", part = "body") %>%
  align(j = 1, align = "left", part = "all") %>%
  align(j = 2:ncol(out_ft), align = "center", part = "all") %>%
  width(j = 1, width = 4.2) %>%
  width(j = 2:ncol(out_ft), width = 1.0) %>%
  autofit()

doc <- read_docx() %>%
  body_add_flextable(ft)

print(doc, target = "Table_1_BreedingEffort.docx")
library(dplyr)
library(ggplot2)
library(forcats)
library(tidytext)   

daic_keep <- 4

effort_bar_df2 <- bind_rows(
  prep_aictab_bar(aic_effort_with_year, "With year"),
  prep_aictab_bar(aic_effort_no_year,   "Without year")
) |>
  group_by(panel) |>
  filter(dAICc <= daic_keep) |>
  mutate(
    
    label_w = tidytext::reorder_within(label, dAICc, panel)

  ) |>
  ungroup()

ggplot(effort_bar_df2, aes(x = label_w, y = dAICc, fill = is_best)) +
  geom_col(width = 0.7) +
  coord_flip() +
  geom_hline(yintercept = 2, linetype = "dashed") +
  facet_wrap(~panel, scales = "free_y") +
  tidytext::scale_x_reordered() +
  scale_fill_manual(values = c(`TRUE` = "steelblue", `FALSE` = "grey75"), guide = "none") +
  scale_y_continuous(limits = c(0, daic_keep), breaks = 0:daic_keep) +
  labs(
    x = NULL,
    y = expression(Delta*AIC[c]),
    title = 
"Objective 1: Model support for breeding effort",
  ) +
  theme_bw() +
  theme(panel.grid = element_blank())


Objective 2: Breeding success

objective2_table <- tribble(
  ~`Hypothesis / justification`, ~Model,

  # ---- Temperature ----
  "**Temp**", "",
  "Extreme heat events cause chick heat stress and dehydration, reducing fledging success",
  "success ~ max_temp + (1 | year)",

  "Cold extremes increase thermoregulatory demand in chicks, increasing mortality risk",
  "success ~ min_temp + (1 | year)",

  # ---- Rain ----
  "**Rain**", "",
  "Prolonged rainfall reduces adult foraging efficiency, limiting food delivery to chicks and reducing breeding success",
  "success ~ rain_days + (1 | year)",

  "Intense rainfall events cause acute nest disturbance and chick exposure, reducing breeding success",
  "success ~ max_rain + (1 | year)",

  "Overall wet years reduce provisioning efficiency across the breeding season, lowering breeding success",
  "success ~ total_rain + (1 | year)",

  # ---- Wind ----
  "**Wind**", "",
  "Extreme winds increase nest exposure and disrupt provisioning, reducing chick survival",
  "success ~ max_wind + (1 | year)",

  # ---- Hail ----
  "**Hail**", "",
  "Severe hail events increase nest destruction and chick mortality, reducing breeding success",
  "success ~ hail_days + (1 | year)",

  # ---- Joint Effects ----
  "**Joint effects**", "",
  "Severe storm exposure increases nest destruction and chick mortality, reducing breeding success",
  "success ~ hail_days + max_wind + (1 | year)",

  "Prolonged wet conditions combined with storm events increase chick exposure and limit provisioning, reducing breeding success",
  "success ~ rain_days + hail_days + (1 | year)",

  "Heat stress combined with prolonged wet conditions reduces chick thermoregulation and food delivery, lowering breeding success",
  "success ~ max_temp + rain_days + (1 | year)",

  # ---- Interactive Effects ----
  "**Interactive effects**", "",
  "The negative effect of heat stress on breeding success is amplified during persistently wet conditions that limit adult foraging",
  "success ~ max_temp * rain_days + (1 | year)",

  "Cold stress on chicks is amplified during extreme wind events due to increased exposure and heat loss, reducing breeding success",
  "success ~ min_temp * max_wind + (1 | year)"
)

kable(
  objective2_table,
  align = c("l", "l"),
  caption = "Objective 2 candidate model set for breeding success. All models are fitted as binomial GLMMs: cbind(success_nests, failed_nests) with a random intercept for year (1 | year)"
)
Objective 2 candidate model set for breeding success. All models are fitted as binomial GLMMs: cbind(success_nests, failed_nests) with a random intercept for year (1 | year)
Hypothesis / justification Model
Temp
Extreme heat events cause chick heat stress and dehydration, reducing fledging success success ~ max_temp + (1 | year)
Cold extremes increase thermoregulatory demand in chicks, increasing mortality risk success ~ min_temp + (1 | year)
Rain
Prolonged rainfall reduces adult foraging efficiency, limiting food delivery to chicks and reducing breeding success success ~ rain_days + (1 | year)
Intense rainfall events cause acute nest disturbance and chick exposure, reducing breeding success success ~ max_rain + (1 | year)
Overall wet years reduce provisioning efficiency across the breeding season, lowering breeding success success ~ total_rain + (1 | year)
Wind
Extreme winds increase nest exposure and disrupt provisioning, reducing chick survival success ~ max_wind + (1 | year)
Hail
Severe hail events increase nest destruction and chick mortality, reducing breeding success success ~ hail_days + (1 | year)
Joint effects
Severe storm exposure increases nest destruction and chick mortality, reducing breeding success success ~ hail_days + max_wind + (1 | year)
Prolonged wet conditions combined with storm events increase chick exposure and limit provisioning, reducing breeding success success ~ rain_days + hail_days + (1 | year)
Heat stress combined with prolonged wet conditions reduces chick thermoregulation and food delivery, lowering breeding success success ~ max_temp + rain_days + (1 | year)
Interactive effects
The negative effect of heat stress on breeding success is amplified during persistently wet conditions that limit adult foraging success ~ max_temp * rain_days + (1 | year)
Cold stress on chicks is amplified during extreme wind events due to increased exposure and heat loss, reducing breeding success success ~ min_temp * max_wind + (1 | year)

success ~ 1 + (1 | year)

library(lme4)

success_df <- success_df |> mutate(year = factor(year))

m0 <- glmer(
  cbind(success_nests, failed_nests) ~ 1 + (1 | year),
  family = binomial,
  data = success_df
)
summary(m0)
Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: cbind(success_nests, failed_nests) ~ 1 + (1 | year)
   Data: success_df

      AIC       BIC    logLik -2*log(L)  df.resid 
    229.0     232.0    -112.5     225.0        30 

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-1.25016 -0.28770  0.00005  0.48287  1.24098 

Random effects:
 Groups Name        Variance Std.Dev.
 year   (Intercept) 0.1292   0.3594  
Number of obs: 32, groups:  year, 32

Fixed effects:
            Estimate Std. Error z value Pr(>|z|)   
(Intercept)  0.21944    0.07595   2.889  0.00386 **
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(lme4)

success_df <- success_df |> dplyr::mutate(year = factor(year))

# Baseline (null) model

m0_success <- glmer(
  cbind(success_nests, failed_nests) ~ 1 + (1 | year),
  family = binomial,
  data = success_df
)

# Single predictor models

m_succ_maxT <- glmer(
  cbind(success_nests, failed_nests) ~ z_max_temp + (1 | year),
  family = binomial, data = success_df
)

m_succ_minT <- glmer(
  cbind(success_nests, failed_nests) ~ z_min_temp + (1 | year),
  family = binomial, data = success_df
)

m_succ_rainD <- glmer(
  cbind(success_nests, failed_nests) ~ z_rain_days + (1 | year),
  family = binomial, data = success_df
)

m_succ_maxR <- glmer(
  cbind(success_nests, failed_nests) ~ z_max_rain + (1 | year),
  family = binomial, data = success_df
)

m_succ_totR <- glmer(
  cbind(success_nests, failed_nests) ~ z_total_rain + (1 | year),
  family = binomial, data = success_df
)

m_succ_maxW <- glmer(
  cbind(success_nests, failed_nests) ~ z_max_wind + (1 | year),
  family = binomial, data = success_df
)

m_succ_hail <- glmer( 
  cbind(success_nests, failed_nests) ~ z_hail_days + (1 | year),
  family = binomial, data = success_df
)

# Joint effects

m_succ_hailW <- glmer(
  cbind(success_nests, failed_nests) ~ z_hail_days + z_max_wind + (1 | year),
  family = binomial, data = success_df
)

m_succ_rainH <- glmer(
  cbind(success_nests, failed_nests) ~ z_rain_days + z_hail_days + (1 | year),
  family = binomial, data = success_df
)

m_succ_heatR <- glmer(
  cbind(success_nests, failed_nests) ~ z_max_temp + z_rain_days + (1 | year),
  family = binomial, data = success_df
)

# Interactions

m_succ_int_heat_rain <- glmer(
  cbind(success_nests, failed_nests) ~ z_max_temp * z_rain_days + (1 | year),
  family = binomial, data = success_df
)

m_succ_int_cold_wind <- glmer(
  cbind(success_nests, failed_nests) ~ z_min_temp * z_max_wind + (1 | year),
  family = binomial, data = success_df
)
# Candidate set + AIC

library(AICcmodavg)

cand_success <- list(
  null_re      = m0_success,

  max_temp     = m_succ_maxT,
  min_temp     = m_succ_minT,
  rain_days    = m_succ_rainD,
  max_rain     = m_succ_maxR,
  total_rain   = m_succ_totR,
  max_wind     = m_succ_maxW,
  hail_days    = m_succ_hail,

  hail_maxW    = m_succ_hailW,
  rain_hail    = m_succ_rainH,
  maxT_rainD   = m_succ_heatR,

  int_heatRain = m_succ_int_heat_rain,
  int_coldWind = m_succ_int_cold_wind
)

aic_success <- aictab(
  cand.set  = cand_success,
  modnames  = names(cand_success)
)

aic_success

Model selection based on AICc:

             K   AICc Delta_AICc AICcWt Cum.Wt      LL
min_temp     3 226.93       0.00   0.43   0.43 -110.03
null_re      2 229.45       2.52   0.12   0.56 -112.52
hail_days    3 230.45       3.53   0.07   0.63 -111.80
max_temp     3 230.85       3.92   0.06   0.69 -111.99
max_rain     3 231.01       4.09   0.06   0.75 -112.08
rain_days    3 231.61       4.68   0.04   0.79 -112.38
total_rain   3 231.65       4.72   0.04   0.83 -112.40
max_wind     3 231.73       4.81   0.04   0.87 -112.44
int_coldWind 5 232.04       5.11   0.03   0.91 -109.87
maxT_rainD   4 232.10       5.18   0.03   0.94 -111.31
rain_hail    4 232.30       5.38   0.03   0.97 -111.41
hail_maxW    4 232.67       5.74   0.02   0.99 -111.59
int_heatRain 5 234.93       8.01   0.01   1.00 -111.31
vc_year <- function(mod) {
  vc <- as.data.frame(VarCorr(mod))
  vc$vcov[vc$grp == "year"][1]
}

v0     <- vc_year(m0_success)
v_minT <- vc_year(m_succ_minT)

prop_between_year_explained <- (v0 - v_minT) / v0
prop_between_year_explained
[1] 0.2098479

Figures and tables

library(dplyr)
library(tibble)
library(lme4)
library(AICcmodavg)
library(officer)
library(flextable)


stopifnot(exists("aic_success"))
stopifnot(exists("cand_success"))
stopifnot(exists("m0_success"))

aic_df <- as.data.frame(aic_success)


if ("Modnames" %in% names(aic_df)) {
  aic_df <- aic_df %>% mutate(model_id = Modnames)
} else {
  aic_df <- tibble::rownames_to_column(aic_df, "model_id")
}


table_success <- aic_df %>%
  rename(
    `ΔAICc` = Delta_AICc,
    w       = AICcWt
  ) %>%
  select(model_id, K, AICc, `ΔAICc`, w) %>%
  arrange(`ΔAICc`)


vc_year <- function(mod) {
  vc <- as.data.frame(VarCorr(mod))
  vc$vcov[vc$grp == "year"][1]
}

v0 <- vc_year(m0_success)


get_stats_success <- function(mod){
 
  fe <- lme4::fixef(mod)
  if (length(fe) == 2) {
    cf <- summary(mod)$coefficients
    slope_se <- paste0(round(cf[2,1], 3), " (", round(cf[2,2], 3), ")")
  } else {
    slope_se <- ""
  }

  v_mod <- vc_year(mod)
  var_expl <- if (is.finite(v_mod) && is.finite(v0) && v0 > 0) (v0 - v_mod) / v0 else NA_real_

  tibble::tibble(`Variance explained` = var_expl, `slope (SE)` = slope_se)
}

stats_df <- lapply(table_success$model_id, function(mn) get_stats_success(cand_success[[mn]])) %>%
  bind_rows() %>%
  mutate(model_id = table_success$model_id)

table_success <- table_success %>%
  left_join(stats_df, by = "model_id") %>%
  mutate(
    AICc = round(AICc, 2),
    `ΔAICc` = round(`ΔAICc`, 2),
    w = round(w, 3),
    `Variance explained` = round(`Variance explained`, 3)
  )


group_order <- c("Baseline", "Temperature", "Rain", "Wind", "Hail", "Joint effects", "Interactions")

model_group_success <- function(m){
  dplyr::case_when(
    m %in% c("null_re") ~ "Baseline",
    m %in% c("max_temp", "min_temp") ~ "Temperature",
    m %in% c("rain_days", "max_rain", "total_rain") ~ "Rain",
    m %in% c("max_wind") ~ "Wind",
    m %in% c("hail_days") ~ "Hail",
    m %in% c("hail_maxW", "rain_hail", "maxT_rainD") ~ "Joint effects",
    m %in% c("int_heatRain", "int_coldWind") ~ "Interactions",
    TRUE ~ "Joint effects"
  )
}

pretty_model_success <- function(x){
  dplyr::recode(
    x,
    "null_re"      = "Intercept only",
    "max_temp"     = "Maximum temperature",
    "min_temp"     = "Minimum temperature",
    "rain_days"    = "Rainy days",
    "max_rain"     = "Maximum daily rainfall",
    "total_rain"   = "Total rainfall",
    "max_wind"     = "Maximum wind speed",
    "hail_days"    = "Hail days",
    "hail_maxW"    = "Hail days + max wind speed",
    "rain_hail"    = "Rainy days + hail days",
    "maxT_rainD"   = "Max temperature + rainy days",
    "int_heatRain" = "Max temperature × rainy days",
    "int_coldWind" = "Min temperature × max wind speed",
    .default = x
  )
}

table_success_clean <- table_success %>%
  mutate(
    Group = model_group_success(model_id),
    Model = pretty_model_success(model_id),
    Group = factor(Group, levels = group_order)
  ) %>%
  arrange(Group, `ΔAICc`) %>%
  select(Group, Model, K, AICc, `ΔAICc`, w, `Variance explained`, `slope (SE)`)


df <- table_success_clean %>%
  rename(`Deviance explained` = `Variance explained`) %>%
  select(Group, Model, K, AICc, `ΔAICc`, w, `Deviance explained`, `slope (SE)`) %>%
  mutate(Group = factor(Group, levels = c("Baseline","Temperature","Rain","Wind","Hail","Joint effects","Interactions"))) %>%
  arrange(Group, `ΔAICc`)

groups <- levels(droplevels(df$Group))
groups <- groups[groups %in% unique(as.character(df$Group))]

make_group_block <- function(g) {
  header <- df[0, ]
  header[1, ] <- NA
  header$Group <- g
  header$Model <- g
  header$is_group <- TRUE

  rows <- df %>% filter(as.character(Group) == g)
  rows$is_group <- FALSE

  bind_rows(header, rows)
}

df2 <- bind_rows(lapply(groups, make_group_block))
group_rows <- which(df2$is_group)

df_print <- df2 %>% select(-Group, -is_group)

ft <- flextable(df_print) %>%
  font(fontname = "Times New Roman", part = "all") %>%
  fontsize(size = 12, part = "all") %>%
  bold(part = "header") %>%
  bg(part = "header", bg = "#f2f2f2") %>%
  align(j = 1, align = "left", part = "all") %>%
  align(j = 2:ncol(df_print), align = "center", part = "all") %>%
  bg(i = group_rows, bg = "#e6e6e6", part = "body") %>%
  bold(i = group_rows, part = "body") %>%
  autofit()

doc <- read_docx() %>%
  body_add_flextable(ft)

print(doc, target = "Table_3_Breeding_Success.docx")
minT_model <- m_succ_minT


success_df_plot <- success_df |>
  mutate(
    success_rate = success_nests / (success_nests + failed_nests),
    year_num = as.numeric(as.character(year))
  )

yrs <- sort(unique(success_df_plot$year_num[is.finite(success_df_plot$year_num)]))
breaks_5y <- yrs[seq(1, length(yrs), by = 5)]  #

ggplot(success_df_plot, aes(x = year_num, y = success_rate)) +
  geom_point(size = 2, colour = "grey30") +
  geom_smooth(
    method = "lm",
    se = TRUE,
    colour = "grey30",
    fill = "grey85"
  ) +
  scale_y_continuous(limits = c(0, 1)) +
  scale_x_continuous(breaks = breaks_5y) +
  labs(
    x = "Year",
    y = "Breeding success",
    title = "Annual breeding success (proportion of successful nests) 
of White-backed Vultures"
  ) +
  theme_bw() +
  theme(panel.grid = element_blank())
`geom_smooth()` using formula = 'y ~ x'

obs_df <- success_df |>
  mutate(
    success_rate = success_nests / (success_nests + failed_nests),
    se = sqrt(success_rate * (1 - success_rate) /
                (success_nests + failed_nests))
  )
minT_model <- m_succ_minT

pred_df <- success_df |>
  mutate(
    pred = predict(
      minT_model,
      newdata = success_df,
      type = "response",
      re.form = NA
    )
  )
obs_df <- success_df |>
  mutate(
    year_num = as.numeric(gsub("[^0-9]", "", as.character(year))),
    success_rate = success_nests / (success_nests + failed_nests),
    se = sqrt(success_rate * (1 - success_rate) / (success_nests + failed_nests))
  )

minT_model <- m_succ_minT

pred_df <- success_df |>
  mutate(
    year_num = as.numeric(gsub("[^0-9]", "", as.character(year))),
    pred = predict(minT_model, newdata = success_df, type = "response", re.form = NA)
  )


ggplot() +
  geom_point(
    data = obs_df,
    aes(x = year_num, y = success_rate),
    size = 2, shape = 21, fill = "white", colour = "grey30"
  ) +
  geom_errorbar(
    data = obs_df,
    aes(x = year_num,
        ymin = success_rate - se,
        ymax = success_rate + se),
    width = 0, colour = "grey30"
  ) +
  geom_line(
    data = pred_df,
    aes(x = year_num, y = pred),
    linewidth = 1, colour = "black"
  ) +
  scale_y_continuous(limits = c(0, 1)) +
  scale_x_continuous(breaks = seq(1995, 2025, by = 5)) +
  labs(
    x = "Year",
    y = "Breeding success",
    title = "Observed and temperature-predicted breeding success",
    subtitle = "Points show annual observed success (± SE); line shows predictions from the minimum temperature model"
  ) +
  theme_bw() +
  theme(panel.grid = element_blank())

figure shows that years with warmer minimum temperatures tend to have slightly higher breeding success, but temperature alone does not explain most of the year-to-year variation. Given the temperature in each year, this is what the model predicts breeding success should be

succ_clean <- as.data.frame(aic_success)

if (!("model_id" %in% names(succ_clean))) {
  if ("Modnames" %in% names(succ_clean)) succ_clean$model_id <- succ_clean$Modnames
  else succ_clean$model_id <- rownames(succ_clean)
}

if (!("dAICc" %in% names(succ_clean))) {
  if ("Delta_AICc" %in% names(succ_clean)) succ_clean$dAICc <- succ_clean$Delta_AICc
  else if ("Delta.AICc" %in% names(succ_clean)) succ_clean$dAICc <- succ_clean$Delta.AICc
  else stop("Can't find dAICc / Delta_AICc in aic_success.")
}

if (!("label" %in% names(succ_clean))) {
  succ_clean$label <- succ_clean$model_id
}

succ_clean <- succ_clean |>
  dplyr::mutate(
    dAICc_plot = dplyr::if_else(model_id == "min_temp", 0.05, dAICc)
  )
succ_clean <- succ_clean |> 
  mutate(dAICc_plot = if_else(model_id == "min_temp", 0.05, dAICc))

ggplot() +
  geom_col(
    data = succ_clean,
    aes(x = label, y = dAICc_plot),
    width = 0.7,
    fill = "grey75"
  ) +
  geom_col(
    data = succ_clean |>  filter(model_id == "min_temp"),
    aes(x = label, y = dAICc_plot),
    width = 0.7,
    fill = "steelblue"
  ) +
  coord_flip() +
  geom_hline(yintercept = 2, linetype = "dashed") +
  labs(
    title = "Model support for breeding success",
    subtitle = "ΔAICc values; best model visually offset",
    x = NULL,
    y = expression(Delta*AIC[c])
  ) +
  theme_bw() +
  theme(panel.grid = element_blank())

library(ggeffects)
library(dplyr)
library(ggplot2)

pred_minT <- ggpredict(
  m_succ_minT,
  terms = "z_min_temp [all]",
  type = "fixed"
) |>
  as.data.frame() |>
  rename(
    x = x,
    predicted = predicted,
    conf.low = conf.low,
    conf.high = conf.high
  )
You are calculating adjusted predictions on the population-level (i.e.
  `type = "fixed"`) for a *generalized* linear mixed model.
  This may produce biased estimates due to Jensen's inequality. Consider
  setting `bias_correction = TRUE` to correct for this bias.
  See also the documentation of the `bias_correction` argument.
# observed annual success
obs_pts <- success_df |>
  mutate(success_rate = success_nests / (success_nests + failed_nests))

ggplot() +
  geom_point(
    data = obs_pts,
    aes(x = z_min_temp, y = success_rate),
    colour = "#B1BD8C",
    alpha = 0.7,
    size = 2.6
  ) +
  geom_ribbon(
    data = pred_minT,
    aes(x = x, ymin = conf.low, ymax = conf.high),
    fill = "#D5D6D2",
    alpha = 0.4
  ) +
  geom_line(
    data = pred_minT,
    aes(x = x, y = predicted),
    colour = "#D5D6D2",
    linewidth = 1.2
  ) +
  labs(
    x = "Minimum temperature (standardised)",
    y = "Breeding success"
  ) +
  theme_bw(base_family = "Times New Roman") +
  theme(
    panel.grid = element_blank(),
    axis.title = element_text(face = "bold", size = 12),
    axis.text  = element_text(face = "bold", size = 12)
  )

Adding population size to the best supported models:

success_df <- success_df |>
  mutate(z_active_nests = scale(active_nests))
m_succ_pop <- glmer(
  cbind(success_nests, failed_nests) ~ z_active_nests + (1 | year),
  family = binomial,
  data = success_df
)
m_succ_minT_pop <- glmer(
  cbind(success_nests, failed_nests) ~ z_min_temp + z_active_nests + (1 | year),
  family = binomial,
  data = success_df
)
summary(m_succ_pop)
Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: cbind(success_nests, failed_nests) ~ z_active_nests + (1 | year)
   Data: success_df

      AIC       BIC    logLik -2*log(L)  df.resid 
    224.6     229.0    -109.3     218.6        29 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-1.5918 -0.3451  0.0927  0.3398  1.2347 

Random effects:
 Groups Name        Variance Std.Dev.
 year   (Intercept) 0.09762  0.3124  
Number of obs: 32, groups:  year, 32

Fixed effects:
               Estimate Std. Error z value Pr(>|z|)    
(Intercept)     0.23572    0.06936   3.399 0.000677 ***
z_active_nests -0.18646    0.07062  -2.640 0.008286 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr)
z_actv_nsts -0.117
summary(m_succ_minT)
Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: cbind(success_nests, failed_nests) ~ z_min_temp + (1 | year)
   Data: success_df

      AIC       BIC    logLik -2*log(L)  df.resid 
    226.1     230.5    -110.0     220.1        29 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-1.2181 -0.3386 -0.1177  0.5078  1.3408 

Random effects:
 Groups Name        Variance Std.Dev.
 year   (Intercept) 0.1021   0.3195  
Number of obs: 32, groups:  year, 32

Fixed effects:
            Estimate Std. Error z value Pr(>|z|)   
(Intercept)  0.21436    0.07017   3.055  0.00225 **
z_min_temp   0.16711    0.07222   2.314  0.02066 * 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
           (Intr)
z_min_temp -0.005
summary(m_succ_minT_pop)
Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: cbind(success_nests, failed_nests) ~ z_min_temp + z_active_nests +  
    (1 | year)
   Data: success_df

      AIC       BIC    logLik -2*log(L)  df.resid 
    218.5     224.4    -105.3     210.5        28 

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-1.67994 -0.44118  0.07867  0.37818  1.53542 

Random effects:
 Groups Name        Variance Std.Dev.
 year   (Intercept) 0.0643   0.2536  
Number of obs: 32, groups:  year, 32

Fixed effects:
               Estimate Std. Error z value Pr(>|z|)    
(Intercept)     0.23483    0.06137   3.826  0.00013 ***
z_min_temp      0.19152    0.06372   3.006  0.00265 ** 
z_active_nests -0.20533    0.06279  -3.270  0.00108 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) z_mn_t
z_min_temp   0.012       
z_actv_nsts -0.147 -0.125
aictab(list(
  min_temp      = m_succ_minT,
  min_temp_pop  = m_succ_minT_pop,
  pop_only      = m_succ_pop
))

Model selection based on AICc:

             K   AICc Delta_AICc AICcWt Cum.Wt      LL
min_temp_pop 4 220.02       0.00   0.91   0.91 -105.27
pop_only     3 225.45       5.44   0.06   0.97 -109.30
min_temp     3 226.93       6.91   0.03   1.00 -110.03
summary(m_succ_minT_pop)
Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: cbind(success_nests, failed_nests) ~ z_min_temp + z_active_nests +  
    (1 | year)
   Data: success_df

      AIC       BIC    logLik -2*log(L)  df.resid 
    218.5     224.4    -105.3     210.5        28 

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-1.67994 -0.44118  0.07867  0.37818  1.53542 

Random effects:
 Groups Name        Variance Std.Dev.
 year   (Intercept) 0.0643   0.2536  
Number of obs: 32, groups:  year, 32

Fixed effects:
               Estimate Std. Error z value Pr(>|z|)    
(Intercept)     0.23483    0.06137   3.826  0.00013 ***
z_min_temp      0.19152    0.06372   3.006  0.00265 ** 
z_active_nests -0.20533    0.06279  -3.270  0.00108 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) z_mn_t
z_min_temp   0.012       
z_actv_nsts -0.147 -0.125
vc_year <- function(mod) {
  vc <- as.data.frame(VarCorr(mod))
  vc$vcov[vc$grp == "year"][1]
}

v0 <- vc_year(m0_success)
v1 <- vc_year(m_succ_minT_pop)

prop_between_year_explained <- (v0 - v1) / v0
prop_between_year_explained
[1] 0.5023207
v_minT <- vc_year(m_succ_minT)

prop_minT <- (v0 - v_minT) / v0
prop_minT
[1] 0.2098479
v_pop <- vc_year(m_succ_pop)

prop_pop <- (v0 - v_pop) / v0
prop_pop
[1] 0.2443964
library(dplyr)
library(tibble)
library(flextable)
library(officer)

sens_tbl <- tibble(
  Model = c(
    "Minimum temperature",
    "Population size",
    "Minimum temperature + population size"
  ),
  `ΔAICc` = c(6.91, 5.44, 0.00),
  `AIC weight` = c(0.03, 0.06, 0.91),
  `Between-year variance explained` = c(0.21, 0.24, 0.50)
)


ft <- flextable(sens_tbl)

ft <- ft |>
  font(fontname = "Times New Roman", part = "all") |>
  fontsize(size = 12, part = "all") |>
  bold(part = "header") |>
  bg(part = "header", bg = "#f2f2f2") |>
  align(j = 1, align = "left", part = "all") |>
  align(j = 2:ncol(sens_tbl), align = "center", part = "all") |>
  autofit()


doc <- read_docx() |>
  body_add_flextable(ft)

print(doc, target = "Sensitivity_Table.docx")

Objective 3

hist(timing_weather_60$lay_DOY)

qqnorm(timing_weather_60$lay_DOY)
qqline(timing_weather_60$lay_DOY)

objective3_table <- tribble(
  ~`Hypothesis / justification`, ~Model,

  # ---- Temperature (pre-laying cues) ----
  "**Temp (pre-laying)**", "",
  "Short-term warm conditions prior to laying may act as a cue for the initiation of breeding.",
  "lay_DOY ~ mean_temp_60",

  "Cold pre-laying conditions delay breeding due to increased energetic costs",
  "lay_DOY ~ min_temp_60",


  # ---- Rain (pre-laying cues) ----
  "**Rain (pre-laying)**", "",
  "Prolonged wet conditions prior to laying delay breeding by limiting foraging efficiency",
  "lay_DOY ~ rain_days_60",

  "Higher rainfall prior to laying signals improved food availability and advances breeding timing",
  "lay_DOY ~ total_rain_60",

  # ---- Wind (pre-laying flight conditions) ----
  "**Wind (pre-laying)**", "",
  "Persistent windy conditions prior to laying increase flight costs and delay breeding timing",
  "lay_DOY ~ mean_wind_60",

  # ---- Lagged effects (carry-over cues) ----
  "**Lagged effects (previous year)**", "",
  "Higher rainfall in the previous year improves food availability and adult condition entering the breeding season, allowing vultures to initiate breeding earlier",
  "lay_DOY ~ lag_total_rain",

  "Prolonged wet conditions in the previous year may influence adult condition and carry over to affect laying timing",
  "lay_DOY ~ lag_rain_days",

  "Thermal conditions in the previous year influence physiological state entering the breeding season",
  "lay_DOY ~ lag_mean_temp",

  # ---- Joint effects ----
  "**Joint effects**", "",
  "Temperature and rainfall cues prior to laying jointly influence breeding timing",
  "lay_DOY ~ mean_temp_60 + rain_days_60",

  "Carry-over effects from the previous year interact with current pre-laying rainfall cues",
  "lay_DOY ~ lag_total_rain + rain_days_60",

  "Previous-year rainfall and current temperature jointly influence breeding timing",
  "lay_DOY ~ lag_total_rain + mean_temp_60",

  # ---- Interactive effects ----
  "**Interactive effects**", "",
  "The effect of temperature cues on breeding timing depends on pre-laying rainfall conditions",
  "lay_DOY ~ mean_temp_60 * rain_days_60",

  "Carry-over effects from the previous year modify responses to current pre-laying temperature cues",
  "lay_DOY ~ lag_total_rain * mean_temp_60"
)

kable(
  objective3_table,
  align = c("l", "l"),
  caption = "Objective 3 candidate model set for breeding timing (laying date, day of year)."
)
Objective 3 candidate model set for breeding timing (laying date, day of year).
Hypothesis / justification Model
Temp (pre-laying)
Short-term warm conditions prior to laying may act as a cue for the initiation of breeding. lay_DOY ~ mean_temp_60
Cold pre-laying conditions delay breeding due to increased energetic costs lay_DOY ~ min_temp_60
Rain (pre-laying)
Prolonged wet conditions prior to laying delay breeding by limiting foraging efficiency lay_DOY ~ rain_days_60
Higher rainfall prior to laying signals improved food availability and advances breeding timing lay_DOY ~ total_rain_60
Wind (pre-laying)
Persistent windy conditions prior to laying increase flight costs and delay breeding timing lay_DOY ~ mean_wind_60
Lagged effects (previous year)
Higher rainfall in the previous year improves food availability and adult condition entering the breeding season, allowing vultures to initiate breeding earlier lay_DOY ~ lag_total_rain
Prolonged wet conditions in the previous year may influence adult condition and carry over to affect laying timing lay_DOY ~ lag_rain_days
Thermal conditions in the previous year influence physiological state entering the breeding season lay_DOY ~ lag_mean_temp
Joint effects
Temperature and rainfall cues prior to laying jointly influence breeding timing lay_DOY ~ mean_temp_60 + rain_days_60
Carry-over effects from the previous year interact with current pre-laying rainfall cues lay_DOY ~ lag_total_rain + rain_days_60
Previous-year rainfall and current temperature jointly influence breeding timing lay_DOY ~ lag_total_rain + mean_temp_60
Interactive effects
The effect of temperature cues on breeding timing depends on pre-laying rainfall conditions lay_DOY ~ mean_temp_60 * rain_days_60
Carry-over effects from the previous year modify responses to current pre-laying temperature cues lay_DOY ~ lag_total_rain * mean_temp_60
# 60 days prior
timing_weather_60 <- timing_weather_60 |>
  left_join(
    weather_yearly_lag |>
      dplyr::select(
        YEAR,
        lag_total_rain,
        lag_rain_days,
        lag_mean_temp
      ),
    by = c("year" = "YEAR")
  )

# 30 days prior
timing_weather_30 <- timing_weather_30 |>
  left_join(
    weather_yearly_lag |>
      dplyr::select(
        YEAR,
        lag_total_rain,
        lag_rain_days,
        lag_mean_temp
      ),
    by = c("year" = "YEAR")
  )
cand_timing_30 <- list(

  meanT_30   = lm(lay_DOY ~ mean_temp_30, data = timing_weather_30),
  maxT_30    = lm(lay_DOY ~ max_temp_30,  data = timing_weather_30),
  minT_30    = lm(lay_DOY ~ min_temp_30,  data = timing_weather_30),

  rainDays_30 = lm(lay_DOY ~ rain_days_30,  data = timing_weather_30),
  totalR_30   = lm(lay_DOY ~ total_rain_30, data = timing_weather_30),

  meanW_30 = lm(lay_DOY ~ mean_wind_30, data = timing_weather_30),
  maxW_30  = lm(lay_DOY ~ max_wind_30,  data = timing_weather_30),

  # lagged (previous year)
  lagTotR  = lm(lay_DOY ~ lag_total_rain, data = timing_weather_30),
  lagRainD = lm(lay_DOY ~ lag_rain_days,  data = timing_weather_30),
  lagMeanT = lm(lay_DOY ~ lag_mean_temp,  data = timing_weather_30),

  # additive
  temp_rain = lm(lay_DOY ~ mean_temp_30 + rain_days_30, data = timing_weather_30),
  temp_wind = lm(lay_DOY ~ mean_temp_30 + mean_wind_30, data = timing_weather_30),
  rain_wind = lm(lay_DOY ~ rain_days_30 + mean_wind_30, data = timing_weather_30),

  # interactions
  temp_x_rain = lm(lay_DOY ~ mean_temp_30 * rain_days_30, data = timing_weather_30),
  minT_x_wind = lm(lay_DOY ~ min_temp_30 * mean_wind_30, data = timing_weather_30),
  lagR_x_temp = lm(lay_DOY ~ lag_total_rain * mean_temp_30, data = timing_weather_30)
)
aic_timing_30 <- aictab(
  cand.set = cand_timing_30,
  modnames = names(cand_timing_30)
)
cand_timing_60 <- list(

  meanT_60   = lm(lay_DOY ~ mean_temp_60, data = timing_weather_60),
  maxT_60    = lm(lay_DOY ~ max_temp_60,  data = timing_weather_60),
  minT_60    = lm(lay_DOY ~ min_temp_60,  data = timing_weather_60),

  rainDays_60 = lm(lay_DOY ~ rain_days_60,  data = timing_weather_60),
  totalR_60   = lm(lay_DOY ~ total_rain_60, data = timing_weather_60),

  meanW_60 = lm(lay_DOY ~ mean_wind_60, data = timing_weather_60),
  maxW_60  = lm(lay_DOY ~ max_wind_60,  data = timing_weather_60),

  # lagged (previous year)
  lagTotR  = lm(lay_DOY ~ lag_total_rain, data = timing_weather_60),
  lagRainD = lm(lay_DOY ~ lag_rain_days,  data = timing_weather_60),
  lagMeanT = lm(lay_DOY ~ lag_mean_temp,  data = timing_weather_60),

  # additive
  temp_rain = lm(lay_DOY ~ mean_temp_60 + rain_days_60, data = timing_weather_60),
  temp_wind = lm(lay_DOY ~ mean_temp_60 + mean_wind_60, data = timing_weather_60),
  rain_wind = lm(lay_DOY ~ rain_days_60 + mean_wind_60, data = timing_weather_60),

  # interactions
  temp_x_rain = lm(lay_DOY ~ mean_temp_60 * rain_days_60, data = timing_weather_60),
  minT_x_wind = lm(lay_DOY ~ min_temp_60 * mean_wind_60, data = timing_weather_60),
  lagR_x_temp = lm(lay_DOY ~ lag_total_rain * mean_temp_60, data = timing_weather_60)
)
aic_timing_60 <- aictab(
  cand.set = cand_timing_60,
  modnames = names(cand_timing_60)
)
aic_timing_30

Model selection based on AICc:

            K     AICc Delta_AICc AICcWt Cum.Wt       LL
temp_x_rain 5 11336.00       0.00      1      1 -5662.98
temp_rain   4 11349.65      13.65      0      1 -5670.81
lagR_x_temp 5 11373.62      37.62      0      1 -5681.79
minT_x_wind 5 11438.51     102.51      0      1 -5714.24
minT_30     3 11529.94     193.94      0      1 -5761.96
temp_wind   4 11571.91     235.91      0      1 -5781.94
meanT_30    3 11600.33     264.33      0      1 -5797.16
maxT_30     3 11820.41     484.41      0      1 -5907.20
rain_wind   4 12224.48     888.48      0      1 -6108.23
lagTotR     3 12278.29     942.29      0      1 -6136.14
lagRainD    3 12289.62     953.61      0      1 -6141.80
lagMeanT    3 12292.70     956.70      0      1 -6143.34
rainDays_30 3 12360.96    1024.96      0      1 -6177.47
totalR_30   3 12400.54    1064.54      0      1 -6197.26
maxW_30     3 12506.89    1170.89      0      1 -6250.44
meanW_30    3 12516.32    1180.31      0      1 -6255.15
summary(cand_timing_30$temp_x_rain)$r.squared
[1] 0.5628433
aic_timing_60

Model selection based on AICc:

            K     AICc Delta_AICc AICcWt Cum.Wt       LL
temp_x_rain 5 10760.71       0.00    0.9    0.9 -5375.33
temp_rain   4 10765.17       4.46    0.1    1.0 -5378.57
lagR_x_temp 5 10889.01     128.31    0.0    1.0 -5439.49
minT_x_wind 5 10945.22     184.51    0.0    1.0 -5467.59
minT_60     3 10990.38     229.67    0.0    1.0 -5492.18
temp_wind   4 11100.18     339.48    0.0    1.0 -5546.08
meanT_60    3 11112.60     351.89    0.0    1.0 -5553.29
maxT_60     3 11394.57     633.86    0.0    1.0 -5694.28
rain_wind   4 12085.12    1324.41    0.0    1.0 -6038.55
rainDays_60 3 12211.31    1450.61    0.0    1.0 -6102.65
lagTotR     3 12278.29    1517.58    0.0    1.0 -6136.14
totalR_60   3 12282.33    1521.62    0.0    1.0 -6138.15
lagRainD    3 12289.62    1528.91    0.0    1.0 -6141.80
lagMeanT    3 12292.70    1531.99    0.0    1.0 -6143.34
maxW_60     3 12533.72    1773.01    0.0    1.0 -6263.85
meanW_60    3 12538.92    1778.21    0.0    1.0 -6266.45
summary(cand_timing_60$temp_x_rain)$r.squared
[1] 0.7052922
m_year_trend <- lm(lay_DOY ~ year, data = timing_weather_30)
summary(m_year_trend)

Call:
lm(formula = lay_DOY ~ year, data = timing_weather_30)

Residuals:
     Min       1Q   Median       3Q      Max 
-145.506  -10.576   -2.616    7.142  142.283 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)
(Intercept) 51.56562   99.65756   0.517    0.605
year         0.05049    0.04958   1.018    0.309

Residual standard error: 18.18 on 1458 degrees of freedom
Multiple R-squared:  0.0007109, Adjusted R-squared:  2.55e-05 
F-statistic: 1.037 on 1 and 1458 DF,  p-value: 0.3086
summary(m_year_trend)$r.squared
[1] 0.0007108822
m_year_trend60 <- lm(lay_DOY ~ year, data = timing_weather_60)
summary(m_year_trend60)

Call:
lm(formula = lay_DOY ~ year, data = timing_weather_60)

Residuals:
     Min       1Q   Median       3Q      Max 
-145.506  -10.576   -2.616    7.142  142.283 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)
(Intercept) 51.56562   99.65756   0.517    0.605
year         0.05049    0.04958   1.018    0.309

Residual standard error: 18.18 on 1458 degrees of freedom
Multiple R-squared:  0.0007109, Adjusted R-squared:  2.55e-05 
F-statistic: 1.037 on 1 and 1458 DF,  p-value: 0.3086

Figures and Tables

library(dplyr)
library(tibble)
library(purrr)
library(broom)
library(AICcmodavg)
library(officer)
library(flextable)

cand_timing_30 <- cand_timing_30[names(cand_timing_30) != "null"]

get_slope_se_single <- function(model, digits = 3){
  tt <- broom::tidy(model) |>
    dplyr::filter(term != "(Intercept)")

  if (nrow(tt) != 1) return("")

  paste0(
    sprintf(paste0("%.", digits, "f"), tt$estimate),
    " (", sprintf(paste0("%.", digits, "f"), tt$std.error), ")"
  )
}


build_obj3_base <- function(cand_list){

  tab <- tibble(
    model_id  = names(cand_list),
    Deviance  = sapply(cand_list, stats::deviance),
    K         = sapply(cand_list, function(m) length(stats::coef(m))),
    AICc      = sapply(cand_list, AICcmodavg::AICc),
    R2        = sapply(cand_list, function(m) summary(m)$r.squared),
    `slope (SE)` = sapply(cand_list, get_slope_se_single)
  ) |>
    arrange(AICc) |>
    mutate(
      `ΔAICc` = AICc - min(AICc),
      w = exp(-0.5 * `ΔAICc`) / sum(exp(-0.5 * `ΔAICc`)),

      Deviance = round(Deviance, 0),
      AICc     = round(AICc, 1),
      `ΔAICc`  = round(`ΔAICc`, 1),
      w        = round(w, 3),
      R2       = round(R2, 3)
    )

  tab
}

model_key <- tibble::tribble(
  ~model_id,      ~Group,               ~Model,
  "meanT_30",     "Temperature",        "Mean temperature (30 days)",
  "maxT_30",      "Temperature",        "Maximum temperature (30 days)",
  "minT_30",      "Temperature",        "Minimum temperature (30 days)",

  "rainDays_30",  "Rain",               "Rainy days (30 days)",
  "totalR_30",    "Rain",               "Total rainfall (30 days)",

  "meanW_30",     "Wind",               "Mean wind speed (30 days)",
  "maxW_30",      "Wind",               "Maximum wind speed (30 days)",

  "lagTotR",      "Lagged effects",     "Previous year's total rainfall",
  "lagRainD",     "Lagged effects",     "Previous year's rainy days",
  "lagMeanT",     "Lagged effects",     "Previous year's mean temperature",

  "temp_rain",    "Joint effects",      "Mean temperature + rainy days",
  "temp_wind",    "Joint effects",      "Mean temperature + mean wind speed",
  "rain_wind",    "Joint effects",      "Rainy days + mean wind speed",

  "temp_x_rain",  "Interactive effects","Mean temperature × rainy days",
  "minT_x_wind",  "Interactive effects","Minimum temperature × mean wind speed",
  "lagR_x_temp",  "Interactive effects","Previous year's rainfall × mean temperature"
)


export_obj3_word_table <- function(cand_list, caption_text = NULL, out_file){

  group_order <- c(
    "Temperature", "Rain", "Wind",
    "Lagged effects", "Joint effects", "Interactive effects", "Other"
  )

  df <- build_obj3_base(cand_list) |>
    left_join(model_key, by = "model_id") |>
    mutate(
      Group = coalesce(Group, "Other"),
      Model = coalesce(Model, model_id),
      Group = factor(Group, levels = group_order)
    ) |>
    arrange(Group, `ΔAICc`) |>
    transmute(
      Group,
      Model,
      Deviance,
      K,
      AICc,
      `ΔAICc`,
      w,
      `` = R2,
      `slope (SE)`
    )

  groups <- levels(droplevels(df$Group))
  groups <- groups[groups %in% unique(as.character(df$Group))]

  make_group_block <- function(g){
    header <- df[0, ]
    header[1, ] <- NA
    header$Group <- g
    header$Model <- g
    header$is_group <- TRUE

    rows <- df |> filter(as.character(Group) == g)
    rows$is_group <- FALSE

    bind_rows(header, rows)
  }

  df2 <- bind_rows(lapply(groups, make_group_block))
  group_rows <- which(df2$is_group)

  df_print <- df2 |> select(-Group, -is_group)

  ft <- flextable(df_print) |>
    font(fontname = "Times New Roman", part = "all") |>
    fontsize(size = 12, part = "all") |>
    bold(part = "header") |>
    bg(part = "header", bg = "#f2f2f2") |>
    align(j = 1, align = "left", part = "all") |>
    align(j = 2:ncol(df_print), align = "center", part = "all") |>
    autofit()

  ft <- ft |> bg(i = group_rows, bg = "#e6e6e6", part = "body") |>
    bold(i = group_rows, part = "body")

  for(r in group_rows){
    ft <- merge_at(ft, i = r, j = 1:ncol(df_print), part = "body")
    ft <- align(ft, i = r, j = 1, align = "left", part = "body")
    ft <- hline(ft, i = r, border = fp_border(width = 1), part = "body")
  }

  doc <- read_docx()
  if(!is.null(caption_text)){
    doc <- doc |> body_add_par(caption_text, style = "Normal")
  }
  doc <- doc |> body_add_flextable(ft)

  print(doc, target = out_file)
}

export_obj3_word_table(
  cand_list = cand_timing_30,
  caption_text = "Table X: Model selection results for breeding timing (laying date, day of year) using 30-day pre-laying weather cues. K = number of parameters; AICc = small-sample AIC; ΔAICc = difference from the best model; w = Akaike weight; R² = coefficient of determination. Slope (SE) is shown for single-predictor models only.",
  out_file = "Table_Breeding_Timing_30day.docx"
)
ggplot(timing_weather_60, aes(x = lay_DOY)) +
  geom_histogram(bins = 15, fill = "grey70", colour = "black") +
  labs(
    x = "Laying date (day of year)",
    y = "Frequency",
    title = "Distribution of laying dates"
  ) +
  theme_bw() +
  theme(panel.grid = element_blank())

timing_weather_30_filt <- timing_weather_30 |>
  dplyr::filter(year >= 1993, year <= 2024)

ggplot(timing_weather_30_filt, aes(x = year, y = lay_DOY)) +
  geom_point(
    size = 1.8,
    colour = "grey30",
    alpha = 0.6
  ) +
  geom_smooth(
    method = "lm",
    se = TRUE,
    colour = "grey",
    linewidth = 1
  ) +
  scale_x_continuous(
    breaks = seq(1995, 2025, by = 5),
    limits = c(1993, 2024)
  ) +
  labs(
    x = "Year",
    y = "Laying date (day of year)",
    title = "Annual variation in laying date at Dronfield"
  ) +
  theme_bw() +
  theme(
    panel.grid = element_blank()
  )+
  geom_jitter(
  width = 0.2,
  height = 0,
  size = 1.6,
  colour = "grey30",
  alpha = 0.6
)
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 46 rows containing missing values or values outside the scale range
(`geom_point()`).

# Figure 3.3: Temperature × rainfall interaction with prediction ribbons

m_int <- cand_timing_30$temp_x_rain

df <- timing_weather_30 |>
  dplyr::filter(year >= 1993, year <= 2024)

# rainfall quantiles
rain_q <- quantile(df$rain_days_30,
                   probs = c(0.25, 0.5, 0.75),
                   na.rm = TRUE)

# prediction grid
newdat <- expand.grid(
  mean_temp_30 = seq(
    min(df$mean_temp_30, na.rm = TRUE),
    max(df$mean_temp_30, na.rm = TRUE),
    length.out = 80
  ),
  rain_days_30 = as.numeric(rain_q)
)

# predictions + SE
pred <- predict(m_int, newdata = newdat, se.fit = TRUE)

newdat$fit <- pred$fit
newdat$lo  <- pred$fit - 1.96 * pred$se.fit
newdat$hi  <- pred$fit + 1.96 * pred$se.fit

newdat$Rain_group <- factor(
  newdat$rain_days_30,
  labels = c("Low rainfall (25%)",
             "Medium rainfall (50%)",
             "High rainfall (75%)")
)

# plot
ggplot(df, aes(x = mean_temp_30, y = lay_DOY)) +
  geom_point(
    colour = "grey75",
    size = 1.6,
    alpha = 0.35
  ) +
  geom_ribbon(
    data = newdat,
    aes(
      x = mean_temp_30,
      ymin = lo,
      ymax = hi,
      group = Rain_group
    ),
    alpha = 0.12,
    inherit.aes = FALSE
  ) +
  geom_line(
    data = newdat,
    aes(
      x = mean_temp_30,
      y = fit,
      linetype = Rain_group
    ),
    linewidth = 1.2,
    inherit.aes = FALSE
  ) +
  labs(
    x = "Mean temperature (30 days pre-laying)",
    y = "Laying date (day of year)",
    linetype = "Rainfall conditions",
    title = "Temperature–rainfall interaction predicts laying date"
  ) +
  theme_bw() +
  theme(
    panel.grid = element_blank(),
    legend.position = "right"
  )

Laying date advanced with increasing pre-laying temperature, but this effect was contingent on rainfall conditions, indicating that thermal cues did not operate independently of broader environmental context.

library(dplyr)
library(ggplot2)
library(forcats)


model_labels <- tibble::tribble(
  ~model_id,      ~label_short,
  "meanT_30",     "Mean T (30d)",
  "maxT_30",      "Max T (30d)",
  "minT_30",      "Min T (30d)",
  "rainDays_30",  "Rain days (30d)",
  "totalR_30",    "Total rain (30d)",
  "meanW_30",     "Mean wind (30d)",
  "maxW_30",      "Max wind (30d)",

  "meanT_60",     "Mean T (60d)",
  "maxT_60",      "Max T (60d)",
  "minT_60",      "Min T (60d)",
  "rainDays_60",  "Rain days (60d)",
  "totalR_60",    "Total rain (60d)",
  "meanW_60",     "Mean wind (60d)",
  "maxW_60",      "Max wind (60d)",

  "lagTotR",      "Lag total rain",
  "lagRainD",     "Lag rain days",
  "lagMeanT",     "Lag mean T",

  "temp_rain",    "Mean T + rain days",
  "temp_wind",    "Mean T + mean wind",
  "rain_wind",    "Rain days + mean wind",

  "temp_x_rain",  "Mean T × rain days",
  "minT_x_wind",  "Min T × mean wind",
  "lagR_x_temp",  "Lag rain × mean T"
)

plot_daic <- function(cand_list, period_label){
  tab <- build_obj3_base(cand_list) |>
    left_join(model_labels, by = "model_id") |>
    mutate(
      label_short = coalesce(label_short, model_id),
      is_best = (`ΔAICc` == min(`ΔAICc`)),
      label_short = fct_reorder(label_short, `ΔAICc`, .desc = FALSE)

    )

  ggplot(tab, aes(x = label_short, y = `ΔAICc`, fill = is_best)) +
    geom_col(width = 0.75) +
    coord_flip() +
    scale_fill_manual(values = c(`TRUE` = "steelblue", `FALSE` = "grey70"), guide = "none") +
    geom_hline(yintercept = 2, linetype = "dashed") +
    labs(
      x = NULL,
      y = expression(Delta*AIC[c]),
      title = paste0("Objective 3: Model support (", period_label, ")"),
      subtitle = "Dashed line = ΔAICc = 2 (models with substantial support)"
    ) +
    theme_bw() +
    theme(panel.grid = element_blank())
}

p_daic_30 <- plot_daic(cand_timing_30, "30-day cues")
p_daic_60 <- plot_daic(cand_timing_60, "60-day cues")

p_daic_30

p_daic_60

library(dplyr)
library(ggplot2)

plot_temp_rain_clean <- function(model, data,
                                 xvar = "mean_temp_30",
                                 zvar = "rain_days_30",
                                 yvar = "lay_DOY",
                                 period_label = "30-day cues",
                                 zoom_y = TRUE){


  qs <- quantile(data[[zvar]], probs = c(0.2, 0.5, 0.8), na.rm = TRUE)

  grid <- expand.grid(
    x = seq(min(data[[xvar]], na.rm = TRUE), max(data[[xvar]], na.rm = TRUE), length.out = 100),
    z = qs
  )
  names(grid) <- c(xvar, zvar)

  grid$pred <- predict(model, newdata = grid)
  grid$rain_level <- factor(
    c("Low rain", "Medium rain", "High rain")[match(grid[[zvar]], qs)],
    levels = c("Low rain", "Medium rain", "High rain")
  )

  p <- ggplot(data, aes(x = .data[[xvar]], y = .data[[yvar]])) +
    geom_point(size = 1.6, alpha = 0.35, colour = "grey25") +
    geom_line(
      data = grid,
      aes(x = .data[[xvar]], y = pred, linetype = rain_level),
      linewidth = 1.1,
      colour = "black"
    ) +
    scale_linetype_manual(values = c("solid", "dashed", "dotdash")) +
    labs(
      title = paste0("Breeding timing  (", period_label, ")"),
      subtitle = "Predicted laying date across temperature at low / medium / high rainfall",
      x = "Mean temperature (30 days prior)",
      y = "Laying date (DOY)",
      linetype = NULL
    ) +
    theme_bw() +
    theme(
      panel.grid = element_blank(),
      plot.title = element_text(size = 12, face = "bold"),
      plot.subtitle = element_text(size = 9),
      axis.title = element_text(size = 9),
      axis.text = element_text(size = 9),
      legend.position = "bottom",
      legend.box = "horizontal"
    )

  if (zoom_y) {
    yl <- quantile(data[[yvar]], probs = c(0.02, 0.98), na.rm = TRUE)
    p <- p + coord_cartesian(ylim = yl)
  }

  p
}


plot_temp_rain_clean(
  model = cand_timing_30$temp_x_rain,
  data  = timing_weather_30,
  xvar  = "mean_temp_30",
  zvar  = "rain_days_30",
  period_label = "30-day cues",
  zoom_y = TRUE
)
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_point()`).

library(dplyr)
library(ggplot2)

plot_temp_rain_clean <- function(model, data,
                                 xvar,
                                 zvar,
                                 yvar = "lay_DOY",
                                 period_label = "",
                                 xlab = NULL,
                                 zlab = NULL,
                                 zoom_y = TRUE){

  dd <- data |>
    dplyr::filter(
      !is.na(.data[[xvar]]),
      !is.na(.data[[zvar]]),
      !is.na(.data[[yvar]])
    )

  qs <- quantile(dd[[zvar]], probs = c(0.2, 0.5, 0.8), na.rm = TRUE)

  grid <- expand.grid(
    x = seq(min(dd[[xvar]], na.rm = TRUE), max(dd[[xvar]], na.rm = TRUE), length.out = 100),
    z = qs
  )
  names(grid) <- c(xvar, zvar)

  grid$pred <- predict(model, newdata = grid)

  z_show <- ifelse(is.null(zlab), zvar, zlab)
  grid$level <- factor(
    c("Low", "Medium", "High")[match(grid[[zvar]], qs)],
    levels = c("Low", "Medium", "High")
  )

  p <- ggplot(dd, aes(x = .data[[xvar]], y = .data[[yvar]])) +
    geom_point(size = 1.6, alpha = 0.35, colour = "grey25") +
    geom_line(
      data = grid,
      aes(x = .data[[xvar]], y = pred, linetype = level),
      linewidth = 1.1,
      colour = "black"
    ) +
    scale_linetype_manual(
      values = c("solid", "dashed", "dotdash"),
      labels = paste0(levels(grid$level), " ", z_show)
    ) +
    labs(
      title = paste0("Breeding timing (", period_label, ")"),
      subtitle = "Predicted laying date across temperature at low / medium / high rainfall",
      x = ifelse(is.null(xlab), xvar, xlab),
      y = "Laying date (DOY)",
      linetype = NULL
    ) +
    theme_bw() +
    theme(
      panel.grid = element_blank(),
      plot.title = element_text(size = 12, face = "bold"),
      plot.subtitle = element_text(size = 9),
      axis.title = element_text(size = 9),
      axis.text = element_text(size = 9),
      legend.position = "bottom",
      legend.box = "horizontal"
    )

  if (zoom_y) {
    yl <- quantile(dd[[yvar]], probs = c(0.02, 0.98), na.rm = TRUE)
    p <- p + coord_cartesian(ylim = yl)
  }

  p
}


p_timing_30 <- plot_temp_rain_clean(
  model = cand_timing_30$temp_x_rain,
  data  = timing_weather_30,
  xvar  = "mean_temp_30",
  zvar  = "rain_days_30",
  period_label = "30-day cues",
  xlab = "Mean temperature (30 days prior)",
  zlab = "Rainy days (30 days prior)",
  zoom_y = TRUE
)


p_timing_60 <- plot_temp_rain_clean(
  model = cand_timing_60$temp_x_rain,
  data  = timing_weather_60,
  xvar  = "mean_temp_60",
  zvar  = "rain_days_60",
  period_label = "60-day cues",
  xlab = "Mean temperature (60 days prior)",
  zlab = "Rainy days (60 days prior)",
  zoom_y = TRUE
)

p_timing_30

p_timing_60

Simulation

set.seed(1)

simulate_b <- function(
  n_years = 32,
  nests_per_year = 60,
  beta_WT = -4,      # W -> T 
  beta_WS = 1.0,     # W -> S 
  sigma_T = 8,       # spread of lay dates within a year
  year_sd = 4        # add between-year timing variation
){

  years <- 1:n_years

  # year-level w
  W_y <- rnorm(n_years, 0, 1)

  # unobserved between-yea
  u_y <- rnorm(n_years, 0, year_sd)

  dat <- do.call(rbind, lapply(years, function(y){
    W <- W_y[y]
    u <- u_y[y]

    # true timing 
    T <- 170 + beta_WT * W + u + rnorm(nests_per_year, 0, sigma_T)

    # success depends on weather 
    eta <- 0.5 + beta_WS * W
    pS  <- plogis(eta)
    S   <- rbinom(nests_per_year, 1, pS)

    # observed timing only if success
    Tstar <- ifelse(S == 1, T, NA_real_)

    data.frame(
      year = y,
      W = W,
      T = T,
      S = S,
      Tstar = Tstar
    )
  }))

  fit_obs <- lm(Tstar ~ W, data = dat)

  fit_true <- lm(T ~ W, data = dat)

  list(
    dat = dat,
    beta_hat_obs  = coef(fit_obs)[["W"]],
    beta_hat_true = coef(fit_true)[["W"]],
    fit_obs = fit_obs,
    fit_true = fit_true
  )
}

#run 1
out <- simulate_b(beta_WT = -4, beta_WS = 1.5)
out$beta_hat_true
[1] -4.245206
out$beta_hat_obs
[1] -3.875318
set.seed(3)
runs <- 1000
res <- replicate(runs, {
  o <- simulate_b(beta_WT = -4, beta_WS = 1.5)
  o$beta_hat_obs - o$beta_hat_true
})
quantile(res, c(0.025, 0.5, 0.975))
       2.5%         50%       97.5% 
-1.11120924 -0.04328755  1.06427728 
mean(res)
[1] -0.02463755
library(dplyr)

success_df <- success_df |>
  mutate(year = as.integer(as.character(year)))

timing_weather_30 <- timing_weather_30 |>
  mutate(year = as.integer(year))

timing_weather_30 <- timing_weather_30 |>
  left_join(
    success_df |>
      dplyr::select(year, active_nests) |>
      dplyr::distinct(),
    by = "year"
  ) |>
  mutate(z_active_nests = as.numeric(scale(active_nests)))



# best 30-day model
m_time30_best <- cand_timing_30$temp_x_rain

# add population size
m_time30_best_pop <- lm(
  lay_DOY ~ mean_temp_30 * rain_days_30 + z_active_nests,
  data = timing_weather_30
)

# compare
summary(m_time30_best)

Call:
lm(formula = lay_DOY ~ mean_temp_30 * rain_days_30, data = timing_weather_30)

Residuals:
    Min      1Q  Median      3Q     Max 
-43.962  -6.720  -0.397   4.983 181.913 

Coefficients:
                           Estimate Std. Error t value Pr(>|t|)    
(Intercept)               220.63071    2.76546  79.781  < 2e-16 ***
mean_temp_30               -4.42827    0.20443 -21.661  < 2e-16 ***
rain_days_30                0.69308    0.56275   1.232    0.218    
mean_temp_30:rain_days_30  -0.15907    0.04014  -3.963 7.75e-05 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 11.75 on 1455 degrees of freedom
  (1 observation deleted due to missingness)
Multiple R-squared:  0.5628,    Adjusted R-squared:  0.5619 
F-statistic: 624.4 on 3 and 1455 DF,  p-value: < 2.2e-16
summary(m_time30_best_pop)

Call:
lm(formula = lay_DOY ~ mean_temp_30 * rain_days_30 + z_active_nests, 
    data = timing_weather_30)

Residuals:
    Min      1Q  Median      3Q     Max 
-42.099  -6.086  -0.918   4.511 179.628 

Coefficients:
                           Estimate Std. Error t value Pr(>|t|)    
(Intercept)               225.32731    2.76165  81.592  < 2e-16 ***
mean_temp_30               -4.74468    0.20343 -23.324  < 2e-16 ***
rain_days_30                0.24935    0.55265   0.451 0.651919    
z_active_nests              2.58245    0.31106   8.302 2.31e-16 ***
mean_temp_30:rain_days_30  -0.13332    0.03935  -3.388 0.000724 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 11.48 on 1454 degrees of freedom
  (1 observation deleted due to missingness)
Multiple R-squared:  0.5826,    Adjusted R-squared:  0.5815 
F-statistic: 507.4 on 4 and 1454 DF,  p-value: < 2.2e-16
AICcmodavg::aictab(list(
  best      = m_time30_best,
  best_pop  = m_time30_best_pop
))

Model selection based on AICc:

         K     AICc Delta_AICc AICcWt Cum.Wt       LL
best_pop 6 11270.45       0.00      1      1 -5629.19
best     5 11336.00      65.56      0      1 -5662.98
summary(m_time30_best)$r.squared
[1] 0.5628433
summary(m_time30_best_pop)$r.squared
[1] 0.5826283
library(dplyr)
library(tibble)
library(AICcmodavg)
library(flextable)
library(officer)


timing_weather_30 <- timing_weather_30 |>
  mutate(year = as.integer(year))

success_df <- success_df |>
  mutate(year = as.integer(as.character(year)))


# candidate column names 
cand_cols <- c("active_nests", "ActiveNests", "n_active_nests", "active", "nests", "n_nests")

# CASE A: timing_weather_30 already has active_nests
if ("active_nests" %in% names(timing_weather_30)) {

  timing_weather_30 <- timing_weather_30 |>
    mutate(z_active_nests = as.numeric(scale(.data[["active_nests"]])))

} else {

  # find a nest column in success_df
  pop_col <- intersect(cand_cols, names(success_df))[1]

  if (is.na(pop_col) || length(pop_col) == 0) {
    stop(
      "Couldn't find an active-nests / population-size column. ",
      "Looked for: ", paste(cand_cols, collapse = ", "),
      ". Available in success_df: ", paste(names(success_df), collapse = ", ")
    )
  }

  success_pop <- success_df |>
    select(year, !!rlang::sym(pop_col)) |>
    distinct() |>
    rename(active_nests = !!rlang::sym(pop_col))

  timing_weather_30 <- timing_weather_30 |>
    left_join(success_pop, by = "year") |>
    mutate(z_active_nests = as.numeric(scale(.data[["active_nests"]])))
}


if (all(is.na(timing_weather_30$z_active_nests))) {
  stop("z_active_nests is all NA after join/scale — check that year values match between datasets.")
}

m_time30_best <- cand_timing_30$temp_x_rain

m_time30_pop <- lm(
  lay_DOY ~ z_active_nests,
  data = timing_weather_30
)

m_time30_best_pop <- lm(
  lay_DOY ~ mean_temp_30 * rain_days_30 + z_active_nests,
  data = timing_weather_30
)


sens_tbl <- tibble(
  Model = c(
    "Mean temperature × rainy days",
    "Population size",
    "Mean temperature × rainy days + population size"
  ),
  AICc = c(
    AICc(m_time30_best),
    AICc(m_time30_pop),
    AICc(m_time30_best_pop)
  ),
  R2 = c(
    summary(m_time30_best)$r.squared,
    summary(m_time30_pop)$r.squared,
    summary(m_time30_best_pop)$r.squared
  )
) |>
  mutate(
    `ΔAICc` = AICc - min(AICc),
    w = exp(-0.5 * `ΔAICc`) / sum(exp(-0.5 * `ΔAICc`)),
    `ΔAICc` = round(`ΔAICc`, 2),
    w = round(w, 2),
    R2 = round(R2, 2)
  ) |>
  select(Model, `ΔAICc`, w, `` = R2)

ft <- flextable(sens_tbl) |>
  font(fontname = "Times New Roman", part = "all") |>
  fontsize(size = 12, part = "all") |>
  bold(part = "header") |>
  bg(part = "header", bg = "#f2f2f2") |>
  align(j = 1, align = "left", part = "all") |>
  align(j = 2:ncol(sens_tbl), align = "center", part = "all") |>
  autofit()

doc <- read_docx() |>
  body_add_par(
    "Table X. assessing whether breeding population size improves model fit when added to the best-supported weather-based timing model (30-day pre-laying cues). ΔAICc values are relative to the best-supported model.",
    style = "Normal"
  ) |>
  body_add_flextable(ft)

print(doc, target = "Table_Obj3_Timing_Population_Sensitivity.docx")
library(dplyr)
library(ggplot2)

newdat <- tibble(
  z_active_nests = seq(
    min(timing_weather_30$z_active_nests, na.rm = TRUE),
    max(timing_weather_30$z_active_nests, na.rm = TRUE),
    length.out = 120
  ),
  mean_temp_30 = mean(timing_weather_30$mean_temp_30, na.rm = TRUE),
  rain_days_30 = mean(timing_weather_30$rain_days_30, na.rm = TRUE)
)

pred <- predict(m_time30_best_pop, newdata = newdat, se.fit = TRUE)
newdat <- newdat |>
  mutate(
    fit = pred$fit,
    lo  = pred$fit - 1.96 * pred$se.fit,
    hi  = pred$fit + 1.96 * pred$se.fit
  )


ggplot(timing_weather_30, aes(x = z_active_nests, y = lay_DOY)) +
  geom_point(
    colour = "grey60",
    alpha = 0.35,
    size = 1.6
  ) +
  geom_ribbon(
    data = newdat,
    aes(x = z_active_nests, ymin = lo, ymax = hi),
    inherit.aes = FALSE,
    alpha = 0.15
  ) +
  geom_line(
    data = newdat,
    aes(x = z_active_nests, y = fit),
    inherit.aes = FALSE,
    linewidth = 1.3,
    colour = "black"
  ) +
  labs(
    x = "Breeding population size (standardised)",
    y = "Laying date (day of year)",
    title = "Population size effect on laying date",
    subtitle = "Predictions from the best-supported weather model (30-day cues);\nmean temperature and rainy days held at their mean values"
  ) +
  theme_bw() +
  theme(panel.grid = element_blank())
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_point()`).

#install.packages("ggdag")
#install.packages("dagitty")
#install.packages(rsvg)


library(ggdag)
library(dagitty)
library(DiagrammeR)
library(rsvg)
Linking to librsvg 2.61.0
library(DiagrammeRsvg)

Objective 3: Breeding Timing - missing data

Scenario A

library(DiagrammeR)
library(DiagrammeRsvg)
library(knitr)

timing_simple_dag <- grViz("
digraph timing_simple {
  graph [
    layout = neato,
    splines = false,
    overlap = false
  ]

  node [
    fontname = Helvetica,
    fontsize = 22,
    penwidth = 2,
    margin = 0.25
  ]

  edge [
    penwidth = 1.8,
    arrowsize = 0.55
  ]

  W     [label = 'W',  shape = box,    pos = '1.4,1.5!']

  T [label = 'T',
     shape = circle,
     fixedsize = true,
     width = 1,
     pos = '2.8,1.5!'
  ]

  S     [label = 'S',  shape = box,    pos = '1.4,0!']
  Tstar [label = 'T*', shape = box,    pos = '2.8,0!']

  W -> T
  T -> Tstar
  S -> Tstar
}
")

svg_txt <- DiagrammeRsvg::export_svg(timing_simple_dag)

out_file <- file.path(tempdir(), "timing_simple_dag.svg")
writeLines(svg_txt, out_file)

print(knitr::include_graphics(out_file))
[1] "../../../../../private/var/folders/x_/3tc7qr7d64g3460z89m23y540000gn/T/RtmpKuB57E/timing_simple_dag.svg"
attr(,"class")
[1] "knit_image_paths" "knit_asis"       

In this scenario, weather (W) does not affect breeding success (S), meaning that success is effectively random with respect to the weather variable used to explain breeding timing. As a result, success does not act as a confounder, and analysing observed timing (T*) yields the correct inference about the effect of weather on timing. This situation is plausible if breeding success is driven mainly by conditions later in the breeding season, rather than by the pre-breeding weather cues that influence the initiation of breeding.

Scenario B

library(DiagrammeR)
library(DiagrammeRsvg)
library(knitr)

timing_B_dag <- grViz("
digraph timing_B {
  graph [
    layout = neato,
    splines = false,
    overlap = false
  ]

  node [
    fontname = Helvetica,
    fontsize = 22,
    penwidth = 2,
    margin = 0.25
  ]

  edge [
    penwidth = 1.8,
    arrowsize = 0.55
  ]

  # ---- nodes (square layout) ----
  W     [label = 'W',  shape = box,    pos = '1.0,1.5!']

  T [label = 'T',
     shape = circle,
     fixedsize = true,
     width = 1,
     pos = '2.8,1.5!'
  ]

  S     [label = 'S',  shape = box,    pos = '1.0,0!']
  Tstar [label = 'T*', shape = box,    pos = '2.8,0!']

  # ---- arrows (all straight) ----
  W -> T
  W -> S
  T -> Tstar
  S -> Tstar
}
")

svg_txt <- DiagrammeRsvg::export_svg(timing_B_dag)
out_file <- file.path(tempdir(), "timing_B_dag.svg")
writeLines(svg_txt, out_file)

knitr::include_graphics(out_file)

print(knitr::include_graphics(out_file))
[1] "../../../../../private/var/folders/x_/3tc7qr7d64g3460z89m23y540000gn/T/RtmpKuB57E/timing_B_dag.svg"
attr(,"class")
[1] "knit_image_paths" "knit_asis"       

In scenario (b), the same weather variable (W) influences both breeding timing (T) and breeding success (S), but timing itself does not causally affect success. In this case, success is associated with weather, and observed timing (T^) is therefore indirectly linked to weather through both pathways. However, because success is not a mediator of the weather–timing relationship, analysing T* can still recover the underlying effect of weather on timing, particularly when effects are approximately linear. The simulation results suggest that under this structure, any bias introduced by observing timing only for successful nests is small and unlikely to alter biological interpretation.

Scenario C

library(DiagrammeR)
library(DiagrammeRsvg)
library(knitr)

timing_C_dag <- grViz("
digraph timing_C {
  graph [
    layout = neato,
    splines = false,
    overlap = false
  ]

  node [
    fontname = Helvetica,
    fontsize = 22,
    penwidth = 2,
    margin = 0.25
  ]

  edge [
    penwidth = 1.8,
    arrowsize = 0.55
  ]

  # ---- nodes ----
  W     [label = 'W',  shape = box,    pos = '1.0,1.5!']

  T [label = 'T',
     shape = circle,
     fixedsize = true,
     width = 1,
     pos = '2.8,1.5!'
  ]

  S     [label = 'S',  shape = box,    pos = '1.0,0!']
  Tstar [label = 'T*', shape = box,    pos = '2.8,0!']

  # ---- arrows ----
  W -> T
  T -> Tstar
  S -> Tstar
  T -> S
}
")

svg_txt <- DiagrammeRsvg::export_svg(timing_C_dag)
out_file <- file.path(tempdir(), "timing_C_dag.svg")
writeLines(svg_txt, out_file)

knitr::include_graphics(out_file)

In scenario (c), breeding timing directly influences breeding success, in addition to weather affecting timing. Under this structure, it is no longer possible to disentangle whether an apparent effect of weather on observed timing reflects a direct causal effect on breeding initiation or an indirect effect mediated through success. In this case, inference is limited to the timing of successful nests only. Although this scenario may seem plausible at first, it is less clear at the interannual scale considered here, as selection on early or late breeding within a season does not necessarily imply that years with earlier or later average breeding are associated with higher or lower overall breeding success.

Scenario D

library(DiagrammeR)
library(DiagrammeRsvg)
library(knitr)

timing_D_dag <- grViz("
digraph timing_D {
  graph [
    layout = neato,
    splines = false,
    overlap = false
  ]

  node [
    fontname = Helvetica,
    fontsize = 22,
    penwidth = 2,
    margin = 0.25
  ]

  edge [
    penwidth = 1.8,
    arrowsize = 0.55
  ]

  # ---- nodes ----
  W     [label = 'W',  shape = box,    pos = '1.0,1.5!']

  T [label = 'T',
     shape = circle, 
     fixedsize = true,
     width = 1,
     pos = '2.8,1.5!'
  ]

  S     [label = 'S',  shape = box,    pos = '1.0,0!']
  Tstar [label = 'T*', shape = box,    pos = '2.8,0!']

  X [label = 'X',
     shape = circle,
     fixedsize = true,
     width = 0.9,
     pos = '2.0,0.75!'
  ]

  # ---- arrows ----
  W -> T
  T -> Tstar
  S -> Tstar

  X -> T
  X -> S
}
")

svg_txt <- DiagrammeRsvg::export_svg(timing_D_dag)
out_file <- file.path(tempdir(), "timing_D_dag.svg")
writeLines(svg_txt, out_file)

knitr::include_graphics(out_file)

In scenario (d), an unobserved variable (X), representing colony state or population size, influences both breeding timing (T) and breeding success (S). In this case, population-level processes such as density dependence, food competition, or overall adult condition could shift both when breeding is initiated and the likelihood of breeding success. Because X affects both timing and success, it acts as a confounder, and the effect of weather on observed timing (T*) cannot be cleanly isolated without accounting for this shared influence. If a suitable proxy for colony size or population state were available, including it in the analysis could help reduce this source of bias; otherwise, inference about causal pathways remains uncertain.


Objective 1: Breeding effort

Scenario A

library(DiagrammeR)
library(DiagrammeRsvg)
library(knitr)

trend_confounded_dag <- grViz("
digraph trend_confounded {
  graph [
    layout = neato,
    splines = false,
    overlap = false
  ]

  node [
    fontname = Helvetica,
    fontsize = 22,
    penwidth = 2,
    shape = box,
    margin = 0.25
  ]

  edge [
    penwidth = 1.8,
    arrowsize = 0.55
  ]

  # ---- nodes ----
  Y [label = 'Y', pos = '2.5,2!']
  W [label = 'W', pos = '1.2,0!']
  B [label = 'B', pos = '4.0,0!']

  # ---- arrows ----
  Y -> W
  Y -> B
  W -> B
}
")

svg_txt <- DiagrammeRsvg::export_svg(trend_confounded_dag)
out_file <- file.path(tempdir(), "trend_confounded_dag.svg")
writeLines(svg_txt, out_file)

knitr::include_graphics(out_file)

in this scenario, the temporal trend (Y) directly influences both weather (W) and breeding effort (B). Because Y affects both variables, it acts as a confounder of the weather–breeding relationship. To estimate the direct effect of weather on breeding effort, it is therefore necessary to control for Y in the analysis.

Scenario B

library(DiagrammeR)
library(DiagrammeRsvg)
library(knitr)

trend_W_B_dag <- grViz("
digraph trend_W_B {
  graph [
    layout = neato,
    splines = false,
    overlap = false
  ]
 
  node [
    fontname = Helvetica,
    fontsize = 22,
    penwidth = 2,
    shape = box,
    margin = 0.25
  ]

  edge [
    penwidth = 1.8,
    arrowsize = 0.55
  ]

  # ---- nodes (match your sketch) ----
  Y [label = 'Y', pos = '2.4,2!']
  W [label = 'W', pos = '1.2,0!']
  B [label = 'B', pos = '4.0,0!']

  # ---- arrows ----
  Y -> W
  W -> B
}
")

svg_txt <- DiagrammeRsvg::export_svg(trend_W_B_dag)
out_file <- file.path(tempdir(), "trend_W_B_dag.svg")
writeLines(svg_txt, out_file)

knitr::include_graphics(out_file)

In this scenario, the temporal trend (Y) influences the weather variable (W), which in turn affects breeding (B). There is no direct effect of the trend on breeding, meaning that Y is not a confounder of the weather–breeding relationship. As a result, controlling for Y is unnecessary and could obscure the effect of interest, since the influence of Y on breeding operates entirely through weather.

Scenario c

library(DiagrammeR)
library(DiagrammeRsvg)
library(knitr)

trend_mediator_dag <- grViz("
digraph trend_mediator {
  graph [
    layout = neato,
    splines = false,
    overlap = false
  ]

  node [
    fontname = Helvetica,
    fontsize = 22,
    penwidth = 2,
    shape = box,
    margin = 0.25
  ]

  edge [
    penwidth = 1.8,
    arrowsize = 0.55
  ]

  # ---- nodes (straight line layout) ----
  W [label = 'W', pos = '0,0!']
  Y [label = 'Y', pos = '3,0!']
  B [label = 'B', pos = '6,0!']

  # ---- arrows ----
  W -> Y
  Y -> B
}
")

svg_txt <- DiagrammeRsvg::export_svg(trend_mediator_dag)
out_file <- file.path(tempdir(), "trend_mediator_dag.svg")
writeLines(svg_txt, out_file)

knitr::include_graphics(out_file)

In this scenario, the temporal trend (Y) acts as a mediator of the weather effect on breeding effort, with weather influencing Y and Y in turn affecting breeding. Because Y lies on the causal pathway between weather and breeding, controlling for Y would block part of the effect of interest. To estimate the total effect of weather on breeding effort, Y should therefore not be included as a control variable.